예제 #1
0
        y_pred = model.predict(x_test)  # return reconstruction errors

        theta, f1 = find_optimal_threshold(y_test, y_pred)
        y_pred = classify(y_pred, theta)
        metrics_report(y_test, y_pred)
        scores.append(
            create_experiment_report(get_metrics(y_test, y_pred),
                                     {'window': w}))
        create_checkpoint(
            {'experiments': scores},
            '../../models/TCN-cropped-window-embeddings-HDFS1.json')
    return {'experiments': scores}


if __name__ == '__main__':
    X_train = load_pickle_file(
        '../../data/processed/HDFS1/X-train-HDFS1-cv1-1-block.pickle')
    X_val = load_pickle_file(
        '../../data/processed/HDFS1/X-val-HDFS1-cv1-1-block.pickle')
    y_train = np.load(
        '../../data/processed/HDFS1/y-train-HDFS1-cv1-1-block.npy')
    y_val = np.load('../../data/processed/HDFS1/y-val-HDFS1-cv1-1-block.npy')

    # results = train_window(X_train, X_val, y_train, y_val)
    # save_experiment(results, '../../models/TCN-cropped-window-embeddings-HDFS1.json')

    # results = train_tcnn(X_train, X_val, y_train, y_val)
    # save_experiment(results, EXPERIMENT_PATH)

    # results = train_cnn1d(X_train, X_val, y_train, y_val)
    # save_experiment(results, EXPERIMENT_PATH)
    if isinstance(model, LocalOutlierFactor):
        y_pred = model.fit_predict(x_test)  # return labels
    else:
        y_pred = model.predict(x_test)  # return labels

    y_pred = convert_predictions(y_pred)
    auc_score = roc_auc_score(y_test, y_pred)
    metrics_report(y_test, y_pred)
    return create_report(
        model_config, {
            **get_metrics(y_test, y_pred), 'auc_score': float(auc_score)
        })


if __name__ == '__main__':
    X_train = load_pickle_file(
        '../../data/processed/HDFS1/X-train-HDFS1-cv1-1-block.pickle')
    X_test = load_pickle_file(
        '../../data/processed/HDFS1/X-test-HDFS1-block.pickle')
    y_test = np.load('../../data/processed/HDFS1/y-test-HDFS1-block.npy')

    experiment_reports = {}

    results = evaluate_tcnn(X_train, X_test, y_test)
    experiment_reports['TCN model'] = results
    print('TCN model:', json.dumps(results, indent=4, sort_keys=True))

    results = evaluate_cnn1d(X_train, X_test, y_test)
    experiment_reports['CNN1D model'] = results
    print('CNN1D model:', json.dumps(results, indent=4, sort_keys=True))

    results = evaluate_cnn2d(X_train, X_test, y_test)
예제 #3
0
        "layers": [
            [142],
            1246,
            [
                100  # output_shape == input_shape
            ]
        ],
        "learning_rate": 0.0016378937069540646,
        "window": 45
    },
    # not used currently
    "model_path": "../../models/aetcn/4f5f4682-1ca5-400a-a340-6243716690c0.pt",
    "threshold": 0.00331703620031476
}

X = load_pickle_file(
    '../../data/processed/HDFS1/X-val-HDFS1-cv1-1-block.npy')[:1000]
y = np.load('../../data/processed/HDFS1/y-val-HDFS1-cv1-1-block.npy')[:1000]

# list of matrices, a matrix == blk_id -> NumPy [[005515, ...], ...] (n_logs x 100)

# F1 = (2 * r * p) / (r + p)

n_examples = 700

sc = CustomMinMaxScaler()  # range 0 -- 1
x_train = sc.fit_transform(X[:n_examples])
y_train = y[:n_examples]
x_test = sc.transform(X[n_examples:])
y_test = y[n_examples:]

model = AETCN()
    for conf in zip(*params.values()):
        kwargs = {k: val for k, val in zip(params.keys(), conf)}

        model.set_params(**kwargs)

        print(f'Model current hyperparameters are: {kwargs}.')

        model.fit(x_train)
        y_pred = model.predict(x_test)  # return reconstruction errors

        theta, f1 = find_optimal_threshold(y_test, y_pred)
        y_pred = classify(y_pred, theta)
        metrics_report(y_test, y_pred)
        scores.append(
            create_experiment_report(get_metrics(y_test, y_pred), kwargs))
        create_checkpoint({'experiments': scores}, EXPERIMENT_PATH)
    return {'experiments': scores}


if __name__ == '__main__':
    X_train = load_pickle_file(
        '../../data/interim/HDFS1/train-data-Drain3-HDFS1-cv1-1.binlog')
    y_train = load_labels(
        '../../data/interim/HDFS1/train-labels-HDFS1-cv1-1.csv')
    X_val = load_pickle_file(
        '../../data/interim/HDFS1/val-data-Drain3-HDFS1-cv1-1.binlog')
    y_val = load_labels('../../data/interim/HDFS1/val-labels-HDFS1-cv1-1.csv')

    results = train_autoencoder(X_train, X_val, y_train, y_val)
    save_experiment(results, EXPERIMENT_PATH)