Esempio n. 1
0
def training_set(events_file: str, no_event_time_shift: int, table_name: str,
                 directory):
    logging.info('start')

    # stiahnutie dat
    con = ConnectionUtil.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'co2_in_ppm')
    logging.info('downloaded events: %d' % len(d))

    # aplikovanie filtrov na eventy
    filtered = FilterUtil.only_valid_events(d)

    # for travis
    no_ev_records = no_events_records
    if ConnectionUtil.is_testable_system():
        filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS]
        no_ev_records = no_events_records[:ConnectionUtil.MAX_TESTABLE_EVENTS]

    logging.info('events after applying the filter: %d' % len(filtered))

    # selector pre data
    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = None

    # trenovacia mnozina
    logging.info('start computing of training set')
    training, tr_events = AttributeUtil.cached_training_data(
        con, table_name, filtered, func, row_selector, interval_selector,
        'open', '{0}/training_cached.csv'.format(directory))
    count = len(training)
    logging.info('training set contains %d events (%d records)' %
                 (count / 2, count))

    GraphUtil.gen_duration_histogram(tr_events, 'save', ['png'],
                                     'Histogram dlzok vetrania',
                                     [x for x in range(5, 60, 5)], 1)

    training2 = AttributeUtil.additional_training_set(con, table_name,
                                                      no_ev_records, func,
                                                      row_selector,
                                                      interval_selector)
    count2 = len(training2)
    logging.info('additional training set contains %d records' % count2)

    logging.info('end computing of training set')

    logging.info('start preparing file of training set')
    balanced = AttributeUtil.balance_set(training, training2)
    CSVUtil.create_csv_file(balanced, '{0}/training.csv'.format(directory))
    logging.info('end preparing file of training set')
Esempio n. 2
0
def training_set(events_file: str, no_event_time_shift: int, table_name: str):
    logging.info('start')

    # download data
    con = ConnectionUtil.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'rh_in2_specific_g_kg')
    logging.info('downloaded events: %d' % len(d))

    # apply filters to data
    filtered = FilterUtil.only_valid_events(d)
    # filtered = FilterUtil.temperature_diff(filtered, 5, 100)
    # filtered = FilterUtil.temperature_out_max(filtered, 15)
    # filtered = FilterUtil.humidity(filtered, 6, 1.6, 100)

    # for travis
    no_ev_records = no_events_records
    if ConnectionUtil.is_testable_system():
        filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS]
        no_ev_records = no_events_records[:ConnectionUtil.MAX_TESTABLE_EVENTS]

    logging.info('events after applying the filter: %d' % len(filtered))

    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = SimpleIntervalSelector(con, table_name)

    logging.info('start computing of training set')
    training, tr_events = AttributeUtil.training_data(con, table_name,
                                                      filtered, func,
                                                      row_selector,
                                                      interval_selector,
                                                      'open')
    count = len(training)
    logging.info('training set contains %d events (%d records)' %
                 (count / 2, count))

    training2 = AttributeUtil.additional_training_set(con, table_name,
                                                      no_ev_records, func,
                                                      row_selector,
                                                      interval_selector)
    count2 = len(training2)
    logging.info('additional training set contains %d records' % count2)

    logging.info('end computing of training set')

    logging.info('start preparing file of training set')
    balanced = AttributeUtil.balance_set(training, training2)
    CSVUtil.create_csv_file(balanced, 'training.csv')
    logging.info('end preparing file of training set')
Esempio n. 3
0
def testing_set(table_name: str, start, end, filename):
    logging.info('start')

    con = ConnectionUtil.create_con()

    logging.info('start computing of testing set')
    length = AttributeUtil.testing_data_with_write(con, table_name, start, end, 30, func,
                                                   None, None, 'open', filename)
    logging.info('testing set contains %d records' % length)
    logging.info('end computing of testing set')

    logging.info('end')
Esempio n. 4
0
def training_set(events_file: str, no_event_time_shift: int, table_name: str):
    logging.info('start')

    # stiahnutie dat
    con = ConnectionUtil.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'co2_in_ppm')
    logging.info('downloaded events: %d' % len(d))

    # aplikovanie filtrov na eventy
    filtered = FilterUtil.only_valid_events(d)

    # for travis
    if ConnectionUtil.is_testable_system():
        filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS]

    logging.info('events after applying the filter: %d' % len(filtered))

    # selector pre data
    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = None

    # datova mnozina
    logging.info('start computing of data set')
    data = AttributeUtil.training_data_without_opposite(
        con, table_name, filtered, func, row_selector, interval_selector)
    logging.info('data set contains %d events' % len(data))
    logging.info('end computing of data set')

    # generovanie suborov
    logging.info('start preparing file of training and testing set')
    random.seed(len(data) // 2)
    random.shuffle(data)

    CSVUtil.create_csv_file(data, 'data.csv')
    logging.info('end preparing file of training and testing set')

    logging.info('end')
Esempio n. 5
0
def main(events_file: str, no_event_time_shift: int):
    logging.info('start')

    table_name = 'measured_klarka'

    # download data
    con = ConnectionUtil.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'rh_in2_specific_g_kg')
    logging.info('downloaded events: %d' % len(d))

    # apply filters to data
    filtered = FilterUtil.only_valid_events(d)
    # filtered = FilterUtil.temperature_diff(filtered, 5, 17.5)
    # filtered = FilterUtil.temperature_diff(filtered, 17.5, 30)
    # filtered = FilterUtil.temperature_diff(filtered, 5, 13.3)
    # filtered = FilterUtil.temperature_diff(filtered, 13.3, 21.6)
    # filtered = FilterUtil.temperature_diff(filtered, 21.6, 30)
    # filtered = FilterUtil.temperature_diff(filtered, 10, 15)
    # filtered = FilterUtil.temperature_diff(filtered, 15, 20)
    # filtered = FilterUtil.temperature_diff(filtered, 20, 25)
    logging.info('events after applying the filter: %d' % len(filtered))

    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = SimpleIntervalSelector(con, table_name)

    # data set
    logging.info('start computing of data set')
    data = AttributeUtil.training_data_without_opposite(
        con, table_name, filtered, func, row_selector, interval_selector)
    logging.info('data set contains %d events' % len(data))
    logging.info('end computing of data set')

    # split data set into training and testing set
    random.seed(len(data) // 2)
    random.shuffle(data)
    training, testing, minimum = training_testing_data(data, 0.7)

    logging.info('training set contains %d records, each %d-krat' %
                 (len(training), minimum))
    logging.info('testing set contains %d records' % len(testing))

    training_testing_data_with_distance(copy.deepcopy(training),
                                        copy.deepcopy(testing), 0,
                                        CenterLineSlope(), "trendline_", False,
                                        False, False, False)
    training_testing_data_with_distance(copy.deepcopy(training),
                                        copy.deepcopy(testing), 1,
                                        PolyfitLineAvgSlope(), "polyfit_",
                                        False, False, False, False)
    training_testing_data_with_distance(copy.deepcopy(training),
                                        copy.deepcopy(testing), 2,
                                        CenterLineSlope(), "center_", False,
                                        False, False, False)
    training_testing_data_only_distance(copy.deepcopy(training),
                                        copy.deepcopy(testing), 3,
                                        CenterLineSlope(), "trendline_", False,
                                        False, False, False)
    training_testing_data_only_distance(copy.deepcopy(training),
                                        copy.deepcopy(testing), 4,
                                        PolyfitLineAvgSlope(), "polyfit_",
                                        False, False, False, False)
    training_testing_data_only_distance(copy.deepcopy(training),
                                        copy.deepcopy(testing), 5,
                                        CenterLineSlope(), "center_", False,
                                        False, False, False)

    training_testing_data_without_distance(copy.deepcopy(training),
                                           copy.deepcopy(testing), 6,
                                           CenterLineSlope(), "trendline_",
                                           False, False, False, False)

    logging.info('end')