def training_set(events_file: str, no_event_time_shift: int, table_name: str): logging.info('start') # stiahnutie dat con = ConnectionUtil.create_con() storage = Storage(events_file, no_event_time_shift, table_name) d = storage.load_data(con, 0, 0, 'co2_in_ppm') logging.info('downloaded events: %d' % len(d)) # aplikovanie filtrov na eventy filtered = FilterUtil.only_valid_events(d) # for travis if ConnectionUtil.is_testable_system(): filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS] logging.info('events after applying the filter: %d' % len(filtered)) # selector pre data row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0) interval_selector = None # datova mnozina logging.info('start computing of data set') data = AttributeUtil.training_data_without_opposite( con, table_name, filtered, func, row_selector, interval_selector) logging.info('data set contains %d events' % len(data)) logging.info('end computing of data set') # generovanie suborov logging.info('start preparing file of training and testing set') random.seed(len(data) // 2) random.shuffle(data) CSVUtil.create_csv_file(data, 'data.csv') logging.info('end preparing file of training and testing set') logging.info('end')
def main(events_file: str, no_event_time_shift: int): logging.info('start') table_name = 'measured_klarka' # download data con = ConnectionUtil.create_con() storage = Storage(events_file, no_event_time_shift, table_name) d = storage.load_data(con, 0, 0, 'rh_in2_specific_g_kg') logging.info('downloaded events: %d' % len(d)) # apply filters to data filtered = FilterUtil.only_valid_events(d) # filtered = FilterUtil.temperature_diff(filtered, 5, 17.5) # filtered = FilterUtil.temperature_diff(filtered, 17.5, 30) # filtered = FilterUtil.temperature_diff(filtered, 5, 13.3) # filtered = FilterUtil.temperature_diff(filtered, 13.3, 21.6) # filtered = FilterUtil.temperature_diff(filtered, 21.6, 30) # filtered = FilterUtil.temperature_diff(filtered, 10, 15) # filtered = FilterUtil.temperature_diff(filtered, 15, 20) # filtered = FilterUtil.temperature_diff(filtered, 20, 25) logging.info('events after applying the filter: %d' % len(filtered)) row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0) interval_selector = SimpleIntervalSelector(con, table_name) # data set logging.info('start computing of data set') data = AttributeUtil.training_data_without_opposite( con, table_name, filtered, func, row_selector, interval_selector) logging.info('data set contains %d events' % len(data)) logging.info('end computing of data set') # split data set into training and testing set random.seed(len(data) // 2) random.shuffle(data) training, testing, minimum = training_testing_data(data, 0.7) logging.info('training set contains %d records, each %d-krat' % (len(training), minimum)) logging.info('testing set contains %d records' % len(testing)) training_testing_data_with_distance(copy.deepcopy(training), copy.deepcopy(testing), 0, CenterLineSlope(), "trendline_", False, False, False, False) training_testing_data_with_distance(copy.deepcopy(training), copy.deepcopy(testing), 1, PolyfitLineAvgSlope(), "polyfit_", False, False, False, False) training_testing_data_with_distance(copy.deepcopy(training), copy.deepcopy(testing), 2, CenterLineSlope(), "center_", False, False, False, False) training_testing_data_only_distance(copy.deepcopy(training), copy.deepcopy(testing), 3, CenterLineSlope(), "trendline_", False, False, False, False) training_testing_data_only_distance(copy.deepcopy(training), copy.deepcopy(testing), 4, PolyfitLineAvgSlope(), "polyfit_", False, False, False, False) training_testing_data_only_distance(copy.deepcopy(training), copy.deepcopy(testing), 5, CenterLineSlope(), "center_", False, False, False, False) training_testing_data_without_distance(copy.deepcopy(training), copy.deepcopy(testing), 6, CenterLineSlope(), "trendline_", False, False, False, False) logging.info('end')