if dataset == 'BGL':
            data_instances = config.BGL_data

            (x_train, y_train), (x_test, y_test), (x_validate, y_validate) = load_BGL(data_instances, 0.35, 0.6)
            collector = Collector(result_folder, (1, 1, 1, 1), False, config.BGL_col_header, 100)

        if dataset == 'HDFS':
            data_instances = config.HDFS_data
            (x_train, y_train), (x_test, y_test), (x_validate, y_validate) = dataloader.load_HDFS(data_instances,
                                                                                                  train_ratio=0.35,
                                                                                                  is_data_instance=True,
                                                                                                  test_ratio=0.6)
            collector = Collector(result_folder, (1, 1, 1, 1), False, config.HDFS_col_header, 100)

        assert FLAGS.h < FLAGS.plb
        lstm_preprocessor = preprocessing.LstmPreprocessor(x_train, x_test, x_validate)
        sym_count = len(lstm_preprocessor.vectors) - 1
        print('Total symbols: %d' % sym_count)
        print(lstm_preprocessor.syms)

        # pad x_train
        x_train = [lstm_preprocessor.pad(t, FLAGS.plb) if len(t) < FLAGS.plb else t for t in x_train]

        # throw away anomalies & same event series in x_train
        x_train = lstm_preprocessor.process_train_inputs(x_train, y_train, FLAGS.h, True,
                                                         FLAGS.no_repeat_series)
        x_train = lstm_preprocessor.transform_to_same_length(x_train, FLAGS.h)

        model = lstm_attention.LSTMAttention(FLAGS.g, FLAGS.h, FLAGS.L, FLAGS.alpha, FLAGS.batch_size, sym_count).model
        # checkpoint = keras.callbacks.ModelCheckpoint(checkpoint_name,
        #                                              verbose=1, save_weights_only=True)
Example #2
0
        result.append(t)
    return result


if __name__ == '__main__':
    assert FLAGS.h < FLAGS.plb

    config.init('testbed')
    checkpoint_name = config.path + FLAGS.checkpoint_name
    top_counts_file_name = config.path + 'top_counts.pkl'

    file = config.testbed_path + 'logstash-2019.07.22_ts-food-service_sorted.csv_structured.csv'
    df = pd.read_csv(file)
    event_sequence = [list(df['EventId'].values)]

    lstm_preprocessor = preprocessing.LstmPreprocessor(event_sequence)
    sym_count = len(lstm_preprocessor.vectors) - 1
    print('Total symbols: %d' % sym_count)
    print(lstm_preprocessor.syms)

    model = lstm_attention_count_vector.LSTMAttention(3, FLAGS.h, FLAGS.L, FLAGS.alpha, FLAGS.batch_size,
                                                      sym_count).model
    checkpoint = keras.callbacks.ModelCheckpoint(checkpoint_name, verbose=1, save_weights_only=True)

    if os.path.exists(checkpoint_name):
        print('== Reading model parameters from %s ==' % checkpoint_name)
        model.load_weights(checkpoint_name)

    inputs, labels = lstm_preprocessor.gen_input_and_label(event_sequence)
    count_vectors = lstm_preprocessor.gen_count_vectors(event_sequence)