def training_set(events_file: str, no_event_time_shift: int, table_name: str, directory): logging.info('start') # stiahnutie dat con = ConnectionUtil.create_con() storage = Storage(events_file, no_event_time_shift, table_name) d = storage.load_data(con, 0, 0, 'co2_in_ppm') logging.info('downloaded events: %d' % len(d)) # aplikovanie filtrov na eventy filtered = FilterUtil.only_valid_events(d) # for travis no_ev_records = no_events_records if ConnectionUtil.is_testable_system(): filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS] no_ev_records = no_events_records[:ConnectionUtil.MAX_TESTABLE_EVENTS] logging.info('events after applying the filter: %d' % len(filtered)) # selector pre data row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0) interval_selector = None # trenovacia mnozina logging.info('start computing of training set') training, tr_events = AttributeUtil.cached_training_data( con, table_name, filtered, func, row_selector, interval_selector, 'open', '{0}/training_cached.csv'.format(directory)) count = len(training) logging.info('training set contains %d events (%d records)' % (count / 2, count)) GraphUtil.gen_duration_histogram(tr_events, 'save', ['png'], 'Histogram dlzok vetrania', [x for x in range(5, 60, 5)], 1) training2 = AttributeUtil.additional_training_set(con, table_name, no_ev_records, func, row_selector, interval_selector) count2 = len(training2) logging.info('additional training set contains %d records' % count2) logging.info('end computing of training set') logging.info('start preparing file of training set') balanced = AttributeUtil.balance_set(training, training2) CSVUtil.create_csv_file(balanced, '{0}/training.csv'.format(directory)) logging.info('end preparing file of training set')
def training_set(events_file: str, no_event_time_shift: int, table_name: str): logging.info('start') # download data con = ConnectionUtil.create_con() storage = Storage(events_file, no_event_time_shift, table_name) d = storage.load_data(con, 0, 0, 'rh_in2_specific_g_kg') logging.info('downloaded events: %d' % len(d)) # apply filters to data filtered = FilterUtil.only_valid_events(d) # filtered = FilterUtil.temperature_diff(filtered, 5, 100) # filtered = FilterUtil.temperature_out_max(filtered, 15) # filtered = FilterUtil.humidity(filtered, 6, 1.6, 100) # for travis no_ev_records = no_events_records if ConnectionUtil.is_testable_system(): filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS] no_ev_records = no_events_records[:ConnectionUtil.MAX_TESTABLE_EVENTS] logging.info('events after applying the filter: %d' % len(filtered)) row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0) interval_selector = SimpleIntervalSelector(con, table_name) logging.info('start computing of training set') training, tr_events = AttributeUtil.training_data(con, table_name, filtered, func, row_selector, interval_selector, 'open') count = len(training) logging.info('training set contains %d events (%d records)' % (count / 2, count)) training2 = AttributeUtil.additional_training_set(con, table_name, no_ev_records, func, row_selector, interval_selector) count2 = len(training2) logging.info('additional training set contains %d records' % count2) logging.info('end computing of training set') logging.info('start preparing file of training set') balanced = AttributeUtil.balance_set(training, training2) CSVUtil.create_csv_file(balanced, 'training.csv') logging.info('end preparing file of training set')
def test_db_connection(): con = ConnectionUtil.create_con() cur = con.cursor() cur.execute('SELECT VERSION()') res = cur.fetchone() logging.info(res)
def testing_set(table_name: str, start, end, filename): logging.info('start') con = ConnectionUtil.create_con() logging.info('start computing of testing set') length = AttributeUtil.testing_data_with_write(con, table_name, start, end, 30, func, None, None, 'open', filename) logging.info('testing set contains %d records' % length) logging.info('end computing of testing set') logging.info('end')
def generate_file(con, start_shift, end_shift, output_file, enable_regression): logging.info('start: ' + output_file) graphs = Graph("./../../src/graph") # stiahnutie dat storage = Storage('examples/events_peto.json', 0, 'measured_filtered_peto') d = storage.load_data(con, start_shift, end_shift, 'co2_in_ppm') logging.info('downloaded events: %d' % len(d)) # aplikovanie filtrov na eventy filtered = FilterUtil.only_valid_events(d) # for travis if ConnectionUtil.is_testable_system(): filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS] logging.info('events after applying the filter: %d' % len(filtered)) # spocitanie regresie if enable_regression: filtered = compute_regression(filtered) logging.info('start generating graphs') gr = [] for event in filtered: t = DateTimeUtil.utc_timestamp_to_str(event['e_start']['timestamp'], '%d.%m. %H:%M:%S') t += ' - ' t += DateTimeUtil.utc_timestamp_to_str(event['e_end']['timestamp'], '%H:%M:%S') if enable_regression: gg = [ Graph.db_to_simple_graph(event, 'co2_in_ppm', 'green', 'CO2', 50), Graph.db_to_simple_graph(event, 'co2_in_ppm_exp', 'red', 'SimpleExpRegression', 50), Graph.db_to_simple_graph(event, 'co2_in_ppm_exp2', 'orange', 'ExpRegressionWithDelay', 50), ] else: gg = [ Graph.db_to_simple_graph(event, 'co2_in_ppm', 'green', 'CO2', 50), ] g = { 'title': t, 'graphs': gg } gr.append(g) graphs.gen(gr, output_file + '.html', 0, 0) logging.info('end generating graphs') logging.info('end')
def training_set(events_file: str, no_event_time_shift: int, table_name: str): logging.info('start') # stiahnutie dat con = ConnectionUtil.create_con() storage = Storage(events_file, no_event_time_shift, table_name) d = storage.load_data(con, 0, 0, 'co2_in_ppm') logging.info('downloaded events: %d' % len(d)) # aplikovanie filtrov na eventy filtered = FilterUtil.only_valid_events(d) # for travis if ConnectionUtil.is_testable_system(): filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS] logging.info('events after applying the filter: %d' % len(filtered)) # selector pre data row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0) interval_selector = None # datova mnozina logging.info('start computing of data set') data = AttributeUtil.training_data_without_opposite( con, table_name, filtered, func, row_selector, interval_selector) logging.info('data set contains %d events' % len(data)) logging.info('end computing of data set') # generovanie suborov logging.info('start preparing file of training and testing set') random.seed(len(data) // 2) random.shuffle(data) CSVUtil.create_csv_file(data, 'data.csv') logging.info('end preparing file of training and testing set') logging.info('end')
def list_of_processes(directory='ventilation_length'): return [ '//DIP/{0}/RandomForest'.format(directory), '//DIP/{0}/NeuralNet'.format(directory), ] if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s') logging.info('start') launcher = ConnectionUtil.rapid_miner()['launcher'] out_s_maskou = [] out_s_exp = [] all = [] for proc in list_of_processes(): proc_name = proc.split('/')[-1] row_klasicky = [('nazov', proc_name), ('pristup', 'klasicky')] row_s_maskou = [('nazov', proc_name), ('pristup', 's_maskou')] row_s_exp = [('nazov', proc_name), ('pristup', 's_exp')] for bin in range(2, 33): cmd = [ launcher, proc,
title = 'window_size:{0},threshold:{1}'.format(window_size, threshold) gen_graph(events_delays, action, extensions, title) logger.disabled = False if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s') logging.info('start') table_name = 'measured_filtered_peto' # stiahnutie dat con = ConnectionUtil.create_con() storage = Storage('examples/events_peto.json', 0, table_name) d = storage.load_data(con, 0, 0, 'co2_in_ppm') logging.info('downloaded events: %d' % len(d)) # aplikovanie filtrov na eventy filtered = FilterUtil.only_valid_events(d) # for travis if ConnectionUtil.is_testable_system(): filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS] logging.info('events after applying the filter: %d' % len(filtered)) extensions = ['eps'] delays(filtered, extensions, ['save'], 11, 15)
def main(events_file: str, owner: str, start_shift: int, end_shift: int, output_filename: str, number_output_records: int): """ :param events_file: path to file containing list of events :param owner: sensor owner(klarka|peto), name must be the same as in database :param start_shift: shift of beginning of data downloading :param end_shift: shift of end of data downloading :param output_filename: filename to store a graph :param number_output_records: number of points that are required in graph :return: """ logging.info('start: ' + output_filename) graphs = Graph("./../../src/graph") # download data con = ConnectionUtil.create_con() storage = Storage(events_file, 0, 'measured_' + owner) d = storage.load_data(con, start_shift, end_shift, 'temperature_in_celsius') logging.info('downloaded events: %d' % len(d)) # apply filters to downloaded data filtered = FilterUtil.only_valid_events(d) filtered = FilterUtil.temperature_diff(filtered, 5, 100) filtered = FilterUtil.temperature_out_max(filtered, 15) filtered = FilterUtil.humidity(filtered, 6, 1.6, 100) # for travis if ConnectionUtil.is_testable_system(): filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS] if owner == 'klarka': filtered = FilterUtil.attribute(filtered, 'window', 'dokoran') logging.info('events after applying the filter: %d' % len(filtered)) # data for graph generation measured using sensor 1 sensor1_events = filtered logging.info('event count: %d for senzor 1' % len(sensor1_events)) # data for graph generation measured using sensor 2 sensor2 = [ 'rh_in2_percentage', 'rh_in2_specific_g_kg', 'rh_in2_absolute_g_m3' ] sensor2_events = FilterUtil.measured_values_not_empty(filtered, sensor2) logging.info('event count: %d for senzor 2' % len(sensor2_events)) # graph generation - sensor 1 logging.info('start generating graphs of events from sensor 1') graphs_sensor_1 = [] for event in sensor1_events: graphs_sensor_1 += generate_graphs_sensor_1(event, owner, number_output_records) graphs.gen(graphs_sensor_1, 'sensor1_' + output_filename, 0, 0) logging.info('end generating graphs of events from sensor 1') # graph generation - sensor 2 logging.info('start generating graphs of events from sensor 2') graphs_sensor_2 = [] for event in sensor2_events: graphs_sensor_2 += generate_graphs_sensor_2(event, owner, number_output_records) graphs.gen(graphs_sensor_2, 'sensor2_' + output_filename, 0, 0) logging.info('end generating graphs of events from sensor 2') logging.info('end')
def generate_file(con, start_shift, end_shift, output_file): logging.info('start: ' + output_file) graphs = Graph("./../../src/graph") # download data storage = Storage('examples/events_klarka_shower.json', 0, 'measured_klarka_shower') d = storage.load_data(con, start_shift, end_shift, 'temperature_in_celsius') logging.info('downloaded events: %d' % len(d)) # apply filters to events filtered = FilterUtil.only_valid_events(d) # for travis if ConnectionUtil.is_testable_system(): filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS] logging.info('events after applying the filter: %d' % len(filtered)) fill_start_end(filtered) logging.info('start generating graphs') gr = [] for event in filtered: t = DateTimeUtil.utc_timestamp_to_str(event['e_start']['timestamp'], '%d.%m. %H:%M:%S') t += ' - ' t += DateTimeUtil.utc_timestamp_to_str(event['e_end']['timestamp'], '%H:%M:%S') g = { 'title': t, 'group': 'one', 'graphs': [ Graph.db_to_simple_graph(event, 'temperature_in_celsius', 'blue', 'Temperature', 75), Graph.db_to_simple_graph(event, 'open_close', 'orange', 'Open', 75), ] } gr.append(g) g = { 'title': t, 'group': 'two', 'graphs': [ Graph.db_to_simple_graph(event, 'rh_in_percentage', 'red', 'Relative humidity [%]', 75), Graph.db_to_simple_graph(event, 'open_close', 'orange', 'Open', 75), ] } gr.append(g) g = { 'title': t, 'group': 'tree', 'graphs': [ Graph.db_to_simple_graph(event, 'rh_in_absolute_g_m3', 'green', 'Absolute humidity [g/m3]', 75), Graph.db_to_simple_graph(event, 'open_close', 'orange', 'Open', 75), ] } gr.append(g) g = { 'title': t, 'group': 'four', 'graphs': [ Graph.db_to_simple_graph(event, 'rh_in_specific_g_kg', 'purple', 'Specific humidity [g/kg]', 75), Graph.db_to_simple_graph(event, 'open_close', 'orange', 'Open', 75), ] } gr.append(g) graphs.gen(gr, output_file + '.html', 0, 0, global_range=True) logging.info('end generating graphs')
Graph.db_to_simple_graph(event, 'rh_in_absolute_g_m3', 'green', 'Absolute humidity [g/m3]', 75), Graph.db_to_simple_graph(event, 'open_close', 'orange', 'Open', 75), ] } gr.append(g) g = { 'title': t, 'group': 'four', 'graphs': [ Graph.db_to_simple_graph(event, 'rh_in_specific_g_kg', 'purple', 'Specific humidity [g/kg]', 75), Graph.db_to_simple_graph(event, 'open_close', 'orange', 'Open', 75), ] } gr.append(g) graphs.gen(gr, output_file + '.html', 0, 0, global_range=True) logging.info('end generating graphs') if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s', ) con = ConnectionUtil.create_con() generate_file(con, 0, 0, 'shower') generate_file(con, -900, +900, 'shower_with_shift')
def main(events_file: str, no_event_time_shift: int): logging.info('start') table_name = 'measured_klarka' # download data con = ConnectionUtil.create_con() storage = Storage(events_file, no_event_time_shift, table_name) d = storage.load_data(con, 0, 0, 'rh_in2_specific_g_kg') logging.info('downloaded events: %d' % len(d)) # apply filters to data filtered = FilterUtil.only_valid_events(d) # filtered = FilterUtil.temperature_diff(filtered, 5, 17.5) # filtered = FilterUtil.temperature_diff(filtered, 17.5, 30) # filtered = FilterUtil.temperature_diff(filtered, 5, 13.3) # filtered = FilterUtil.temperature_diff(filtered, 13.3, 21.6) # filtered = FilterUtil.temperature_diff(filtered, 21.6, 30) # filtered = FilterUtil.temperature_diff(filtered, 10, 15) # filtered = FilterUtil.temperature_diff(filtered, 15, 20) # filtered = FilterUtil.temperature_diff(filtered, 20, 25) logging.info('events after applying the filter: %d' % len(filtered)) row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0) interval_selector = SimpleIntervalSelector(con, table_name) # data set logging.info('start computing of data set') data = AttributeUtil.training_data_without_opposite( con, table_name, filtered, func, row_selector, interval_selector) logging.info('data set contains %d events' % len(data)) logging.info('end computing of data set') # split data set into training and testing set random.seed(len(data) // 2) random.shuffle(data) training, testing, minimum = training_testing_data(data, 0.7) logging.info('training set contains %d records, each %d-krat' % (len(training), minimum)) logging.info('testing set contains %d records' % len(testing)) training_testing_data_with_distance(copy.deepcopy(training), copy.deepcopy(testing), 0, CenterLineSlope(), "trendline_", False, False, False, False) training_testing_data_with_distance(copy.deepcopy(training), copy.deepcopy(testing), 1, PolyfitLineAvgSlope(), "polyfit_", False, False, False, False) training_testing_data_with_distance(copy.deepcopy(training), copy.deepcopy(testing), 2, CenterLineSlope(), "center_", False, False, False, False) training_testing_data_only_distance(copy.deepcopy(training), copy.deepcopy(testing), 3, CenterLineSlope(), "trendline_", False, False, False, False) training_testing_data_only_distance(copy.deepcopy(training), copy.deepcopy(testing), 4, PolyfitLineAvgSlope(), "polyfit_", False, False, False, False) training_testing_data_only_distance(copy.deepcopy(training), copy.deepcopy(testing), 5, CenterLineSlope(), "center_", False, False, False, False) training_testing_data_without_distance(copy.deepcopy(training), copy.deepcopy(testing), 6, CenterLineSlope(), "trendline_", False, False, False, False) logging.info('end')
def main(events_file: str, start_shift: int, end_shift: int, output_filename: str, output_records: int): logging.info('start') graphs = Graph("./../../src/graph") # download data con = ConnectionUtil.create_con() storage = Storage(events_file, 0, 'measured_klarka') d = storage.load_data(con, start_shift, end_shift, 'temperature_in_celsius') logging.info('downloaded events: %d' % len(d)) # apply filters to events filtered = FilterUtil.only_valid_events(d) filtered = FilterUtil.temperature_diff(filtered, 5, 100) filtered = FilterUtil.temperature_out_max(filtered, 15) filtered = FilterUtil.humidity(filtered, 6, 1.6, 100) min_timestamp = int( DateTimeUtil.local_time_str_to_utc('2018/11/01 00:01:00').timestamp()) filtered = FilterUtil.min_timestamp(filtered, min_timestamp) filtered = FilterUtil.min_max_time_interval(filtered, 1440, 1620) # for travis if ConnectionUtil.is_testable_system(): filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS] logging.info('events after applying the filter: %d' % len(filtered)) # data for graph generation measured using sensor 1 sensor1_events = filtered logging.info('event count: %d for senzor 1' % len(sensor1_events)) linear_reg(sensor1_events, 'rh_in_specific_g_kg', 'linear1_sh') linear_reg(sensor1_events, 'rh_in_absolute_g_m3', 'linear1_ah') linear_reg(sensor1_events, 'temperature_in_celsius', 'linear1_temp') # graph generation - sensor 1 logging.info('start generating graphs of events from sensor 1') graphs_sensor_1 = [] for event in sensor1_events: graphs_sensor_1 += gen_graphs(event, output_records, [ 'rh_in_specific_g_kg', 'rh_in_absolute_g_m3', 'temperature_in_celsius' ], ['linear1_sh', 'linear1_ah', 'linear1_temp']) graphs.gen(graphs_sensor_1, 'sensor1_' + output_filename, 0, 0, global_range=True) logging.info('end generating graphs of events from sensor 1') # data for graph generation measured using sensor 2 sensor2_events = filtered logging.info('event count: %d for sensor 2' % len(sensor2_events)) sensor2_events = FilterUtil.measured_values_not_empty( sensor2_events, 'rh_in2_specific_g_kg') sensor2_events = FilterUtil.measured_values_not_empty( sensor2_events, 'rh_in2_absolute_g_m3') sensor2_events = FilterUtil.measured_values_not_empty( sensor2_events, 'temperature_in2_celsius') logging.info('events after applying the filter: %d' % len(sensor2_events)) linear_reg(sensor2_events, 'rh_in2_specific_g_kg', 'linear2_sh') linear_reg(sensor2_events, 'rh_in2_absolute_g_m3', 'linear2_ah') linear_reg(sensor2_events, 'temperature_in2_celsius', 'linear2_temp') humidity_info_csv(sensor2_events, start_shift, end_shift) # graph generation - sensor 2 logging.info('start generating graphs of events from sensor 2') graphs_sensor_2 = [] for event in sensor2_events: graphs_sensor_2 += gen_graphs(event, output_records, [ 'rh_in2_specific_g_kg', 'rh_in2_absolute_g_m3', 'temperature_in2_celsius' ], ['linear2_sh', 'linear2_ah', 'linear2_temp']) graphs.gen(graphs_sensor_2, 'sensor2_' + output_filename, 0, 0, global_range=True) logging.info('end generating graphs of events from sensor 2') logging.info('end')