Beispiel #1
0
    def testing_data_with_write(con,
                                table_name,
                                start,
                                end,
                                write_each,
                                func,
                                row_selector,
                                interval_selector,
                                event_type,
                                output_filename,
                                row_count=2048,
                                log_every_hour=1):
        """Generation of testing data, continuous writing to a file is optional.

        :param con:
        :param table_name: table name
        :param start: interval from which testing data is generated
        :param end: interval to which testing data is generated
        :param write_each:
        :param func:
        :param row_selector:
        :param interval_selector:
        :param event_type: event type (open or close)
        :param output_filename: file where data is stored
        :param row_count: number of rows that are written to a file together
        :return:
        """

        step = row_count * write_each
        records = 0

        if os.path.isfile(output_filename):
            os.remove(output_filename)

        last_timestamp = start
        for timestamp in range(start + step, end + step, step):
            if timestamp > end:
                timestamp = timestamp - (timestamp - end)

            if row_selector is None:
                selector = CachedDiffRowWithIntervalSelector(
                    con, table_name, last_timestamp, timestamp)
            else:
                selector = row_selector

            tr = AttributeUtil.testing_data(con, table_name, last_timestamp,
                                            timestamp, write_each, func,
                                            selector, interval_selector,
                                            event_type, log_every_hour)
            CSVUtil.create_csv_file(tr, output_filename, enable_append=True)
            last_timestamp = timestamp
            records += len(tr)

            if row_selector is None:
                selector.clear()

        return records
Beispiel #2
0
def training_set(events_file: str, no_event_time_shift: int, table_name: str,
                 directory):
    logging.info('start')

    # stiahnutie dat
    con = ConnectionUtil.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'co2_in_ppm')
    logging.info('downloaded events: %d' % len(d))

    # aplikovanie filtrov na eventy
    filtered = FilterUtil.only_valid_events(d)

    # for travis
    no_ev_records = no_events_records
    if ConnectionUtil.is_testable_system():
        filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS]
        no_ev_records = no_events_records[:ConnectionUtil.MAX_TESTABLE_EVENTS]

    logging.info('events after applying the filter: %d' % len(filtered))

    # selector pre data
    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = None

    # trenovacia mnozina
    logging.info('start computing of training set')
    training, tr_events = AttributeUtil.cached_training_data(
        con, table_name, filtered, func, row_selector, interval_selector,
        'open', '{0}/training_cached.csv'.format(directory))
    count = len(training)
    logging.info('training set contains %d events (%d records)' %
                 (count / 2, count))

    GraphUtil.gen_duration_histogram(tr_events, 'save', ['png'],
                                     'Histogram dlzok vetrania',
                                     [x for x in range(5, 60, 5)], 1)

    training2 = AttributeUtil.additional_training_set(con, table_name,
                                                      no_ev_records, func,
                                                      row_selector,
                                                      interval_selector)
    count2 = len(training2)
    logging.info('additional training set contains %d records' % count2)

    logging.info('end computing of training set')

    logging.info('start preparing file of training set')
    balanced = AttributeUtil.balance_set(training, training2)
    CSVUtil.create_csv_file(balanced, '{0}/training.csv'.format(directory))
    logging.info('end preparing file of training set')
Beispiel #3
0
def training_set(events_file: str, no_event_time_shift: int, table_name: str):
    logging.info('start')

    # download data
    con = ConnectionUtil.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'rh_in2_specific_g_kg')
    logging.info('downloaded events: %d' % len(d))

    # apply filters to data
    filtered = FilterUtil.only_valid_events(d)
    # filtered = FilterUtil.temperature_diff(filtered, 5, 100)
    # filtered = FilterUtil.temperature_out_max(filtered, 15)
    # filtered = FilterUtil.humidity(filtered, 6, 1.6, 100)

    # for travis
    no_ev_records = no_events_records
    if ConnectionUtil.is_testable_system():
        filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS]
        no_ev_records = no_events_records[:ConnectionUtil.MAX_TESTABLE_EVENTS]

    logging.info('events after applying the filter: %d' % len(filtered))

    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = SimpleIntervalSelector(con, table_name)

    logging.info('start computing of training set')
    training, tr_events = AttributeUtil.training_data(con, table_name,
                                                      filtered, func,
                                                      row_selector,
                                                      interval_selector,
                                                      'open')
    count = len(training)
    logging.info('training set contains %d events (%d records)' %
                 (count / 2, count))

    training2 = AttributeUtil.additional_training_set(con, table_name,
                                                      no_ev_records, func,
                                                      row_selector,
                                                      interval_selector)
    count2 = len(training2)
    logging.info('additional training set contains %d records' % count2)

    logging.info('end computing of training set')

    logging.info('start preparing file of training set')
    balanced = AttributeUtil.balance_set(training, training2)
    CSVUtil.create_csv_file(balanced, 'training.csv')
    logging.info('end preparing file of training set')
Beispiel #4
0
def training_set(events_file: str, no_event_time_shift: int, table_name: str):
    logging.info('start')

    # stiahnutie dat
    con = ConnectionUtil.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'co2_in_ppm')
    logging.info('downloaded events: %d' % len(d))

    # aplikovanie filtrov na eventy
    filtered = FilterUtil.only_valid_events(d)

    # for travis
    if ConnectionUtil.is_testable_system():
        filtered = filtered[:ConnectionUtil.MAX_TESTABLE_EVENTS]

    logging.info('events after applying the filter: %d' % len(filtered))

    # selector pre data
    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = None

    # datova mnozina
    logging.info('start computing of data set')
    data = AttributeUtil.training_data_without_opposite(
        con, table_name, filtered, func, row_selector, interval_selector)
    logging.info('data set contains %d events' % len(data))
    logging.info('end computing of data set')

    # generovanie suborov
    logging.info('start preparing file of training and testing set')
    random.seed(len(data) // 2)
    random.shuffle(data)

    CSVUtil.create_csv_file(data, 'data.csv')
    logging.info('end preparing file of training and testing set')

    logging.info('end')
Beispiel #5
0
def main(events_file: str, no_event_time_shift: int):
    logging.info('start')

    table_name = 'measured_klarka'

    # download data
    con = ConnectionUtil.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'rh_in2_specific_g_kg')
    logging.info('downloaded events: %d' % len(d))

    # apply filters to data
    filtered = FilterUtil.only_valid_events(d)
    # filtered = FilterUtil.temperature_diff(filtered, 5, 17.5)
    # filtered = FilterUtil.temperature_diff(filtered, 17.5, 30)
    # filtered = FilterUtil.temperature_diff(filtered, 5, 13.3)
    # filtered = FilterUtil.temperature_diff(filtered, 13.3, 21.6)
    # filtered = FilterUtil.temperature_diff(filtered, 21.6, 30)
    # filtered = FilterUtil.temperature_diff(filtered, 10, 15)
    # filtered = FilterUtil.temperature_diff(filtered, 15, 20)
    # filtered = FilterUtil.temperature_diff(filtered, 20, 25)
    logging.info('events after applying the filter: %d' % len(filtered))

    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = SimpleIntervalSelector(con, table_name)

    # data set
    logging.info('start computing of data set')
    data = AttributeUtil.training_data_without_opposite(
        con, table_name, filtered, func, row_selector, interval_selector)
    logging.info('data set contains %d events' % len(data))
    logging.info('end computing of data set')

    # split data set into training and testing set
    random.seed(len(data) // 2)
    random.shuffle(data)
    training, testing, minimum = training_testing_data(data, 0.7)

    logging.info('training set contains %d records, each %d-krat' %
                 (len(training), minimum))
    logging.info('testing set contains %d records' % len(testing))

    training_testing_data_with_distance(copy.deepcopy(training),
                                        copy.deepcopy(testing), 0,
                                        CenterLineSlope(), "trendline_", False,
                                        False, False, False)
    training_testing_data_with_distance(copy.deepcopy(training),
                                        copy.deepcopy(testing), 1,
                                        PolyfitLineAvgSlope(), "polyfit_",
                                        False, False, False, False)
    training_testing_data_with_distance(copy.deepcopy(training),
                                        copy.deepcopy(testing), 2,
                                        CenterLineSlope(), "center_", False,
                                        False, False, False)
    training_testing_data_only_distance(copy.deepcopy(training),
                                        copy.deepcopy(testing), 3,
                                        CenterLineSlope(), "trendline_", False,
                                        False, False, False)
    training_testing_data_only_distance(copy.deepcopy(training),
                                        copy.deepcopy(testing), 4,
                                        PolyfitLineAvgSlope(), "polyfit_",
                                        False, False, False, False)
    training_testing_data_only_distance(copy.deepcopy(training),
                                        copy.deepcopy(testing), 5,
                                        CenterLineSlope(), "center_", False,
                                        False, False, False)

    training_testing_data_without_distance(copy.deepcopy(training),
                                           copy.deepcopy(testing), 6,
                                           CenterLineSlope(), "trendline_",
                                           False, False, False, False)

    logging.info('end')