Exemple #1
0
def random_search_unsupervised(data_and_labels: tuple, model: Union[LocalOutlierFactor, IsolationForest],
                               params: Dict) -> Dict:
    x_train, x_test, _, y_test = data_and_labels

    scores = []
    for experiment in params['experiments']:
        model.set_params(**experiment['hyperparameters'])

        print(f'Model current hyperparameters are: {experiment["hyperparameters"]}.')

        if isinstance(model, LocalOutlierFactor):
            y_pred = model.fit_predict(x_test)  # return labels
        else:
            model.fit(x_train)
            y_pred = model.predict(x_test)  # return labels

        y_pred = convert_predictions(y_pred)
        metrics_report(y_test, y_pred)

        model_path = create_model_path(DIR_TO_EXPERIMENTS, str(uuid.uuid4()))
        torch.save(model, model_path)

        res = create_experiment_report(get_metrics(y_test, y_pred), experiment['hyperparameters'], file_path=model_path)
        scores.append(res)
        create_checkpoint({'experiments': scores}, EXPERIMENT_PATH)
    return {
        'experiments': scores
    }
Exemple #2
0
def random_search(data_and_labels: tuple, model: TransformerAutoEncoder,
                  params: Dict) -> Dict:
    x_train, x_test, _, y_test = data_and_labels

    scores = []
    for conf in zip(*params.values()):
        kwargs = {k: val for k, val in zip(params.keys(), conf)}

        model.set_params(**kwargs)

        print(f'Model current hyperparameters are: {kwargs}.')

        model.fit(x_train)
        y_pred = model.predict(x_test)  # return reconstruction errors

        theta, f1 = find_optimal_threshold(y_test, y_pred)
        y_pred = classify(y_pred, theta)
        metrics_report(y_test, y_pred)
        scores.append(
            create_experiment_report(get_metrics(y_test, y_pred), kwargs))
        # visualize_distribution_with_labels(y_pred, y_test, to_file=False)
        from sklearn.metrics import confusion_matrix
        print(confusion_matrix(y_test, y_pred))
        create_checkpoint({'experiments': scores}, EXPERIMENT_PATH)
    return {'experiments': scores}
def grid_search(data_and_labels: tuple, model: Union[LocalOutlierFactor,
                                                     IsolationForest],
                params: Dict) -> Dict:
    x_train, x_test, _, y_test = data_and_labels

    scores = []
    for conf in itertools.product(*params.values()):
        kwargs = {k: val for k, val in zip(params.keys(), conf)}

        model.set_params(**kwargs)

        print(f'Model (hyper)parameters are: {model.get_params()}.')

        if isinstance(model, LocalOutlierFactor):
            y_pred = model.fit_predict(x_test)
        else:
            model.fit(x_train)
            y_pred = model.predict(x_test)

        y_pred = convert_predictions(y_pred)
        metrics_report(y_test, y_pred)
        scores.append(
            create_experiment_report(get_metrics(y_test, y_pred),
                                     model.get_params()))
    return {'experiments': scores}
Exemple #4
0
def random_search(data_and_labels: tuple, model: Union[AutoEncoder, VanillaTCN, AETCN, AECNN1D, CNN1D, CNN2D, TCNCNN1D,
                                                       SACNN1D, SACNN2D], params: Dict) -> Dict:
    x_train, x_test, _, y_test = data_and_labels

    scores = []
    for experiment in params['experiments']:
        model.set_params(**experiment['hyperparameters'])

        print(f'Model current hyperparameters are: {experiment["hyperparameters"]}.')

        model.fit(x_train)
        y_pred = model.predict(x_test)  # return reconstruction errors

        theta, f1 = find_optimal_threshold(y_test, y_pred)
        y_pred = classify(y_pred, theta)
        metrics_report(y_test, y_pred)

        model_path = create_model_path(DIR_TO_EXPERIMENTS, str(uuid.uuid4()))
        torch.save(model, model_path)

        res = create_experiment_report(get_metrics(y_test, y_pred), experiment['hyperparameters'], theta, model_path)
        scores.append(res)
        create_checkpoint({'experiments': scores}, EXPERIMENT_PATH)
    return {
        'experiments': scores
    }
def evaluate_unsupervised(x_test: np.ndarray, y_test: np.array,
                          experiments: Dict) -> Dict:
    model_config = find_best_model(experiments)

    model = torch.load(model_config['model_path'])

    if isinstance(model, LocalOutlierFactor):
        y_pred = model.fit_predict(x_test)  # return labels
    else:
        y_pred = model.predict(x_test)  # return labels

    y_pred = convert_predictions(y_pred)
    auc_score = roc_auc_score(y_test, y_pred)
    metrics_report(y_test, y_pred)
    return create_report(
        model_config, {
            **get_metrics(y_test, y_pred), 'auc_score': float(auc_score)
        })
def evaluate(x_test: np.ndarray, y_test: np.array, experiments: Dict) -> Dict:
    model_config = find_best_model(experiments)

    model = torch.load(model_config['model_path'])
    theta = model_config['threshold']

    y_pred = model.predict(x_test)  # return reconstruction errors

    np.savez('preds', y_pred=y_pred, y_test=y_test)
    auc_score = roc_auc_score(y_test, y_pred)

    y_pred = classify(y_pred, theta)
    metrics_report(y_test, y_pred)
    # print('# trainable params:', sum(p.numel() for p in model._model.parameters() if p.requires_grad), ',# params:', sum(p.numel() for p in model._model.parameters()))
    return create_report(
        model_config, {
            **get_metrics(y_test, y_pred), 'auc_score': float(auc_score)
        })
def get_extracted_features(x_train: List, x_val: List, x_test: List,
                           y_val: np.array):
    sc = CustomMinMaxScaler()
    x_train = sc.fit_transform(x_train)
    x_val = sc.transform(x_val)
    x_test = sc.transform(x_test)

    model = torch.load(
        '../../models/aetcn/5d9ad591-6d3c-428f-894f-02af96ca1930.pt')

    y_pred = model.predict(x_val)  # return reconstruction errors
    train_features = model.extract_features(x_train).astype(dtype=np.float32)
    val_features = model.extract_features(x_val).astype(dtype=np.float32)
    test_features = model.extract_features(x_test).astype(dtype=np.float32)

    theta, f1 = find_optimal_threshold(y_val, y_pred)
    y_pred = classify(y_pred, theta)
    metrics_report(y_val, y_pred)
    return train_features, val_features, test_features
def random_search(data_and_labels: tuple, model: Union[AutoEncoder,
                                                       IsolationForest],
                  params: Dict) -> Dict:
    x_train, x_test, _, y_test = data_and_labels

    scores = []
    for conf in zip(*params.values()):
        kwargs = {k: val for k, val in zip(params.keys(), conf)}

        model.set_params(**kwargs)

        print(f'Model current hyperparameters are: {kwargs}.')

        model.fit(x_train)
        y_pred = model.predict(x_test)  # return reconstruction errors

        theta, f1 = find_optimal_threshold(y_test, y_pred)
        y_pred = classify(y_pred, theta)
        metrics_report(y_test, y_pred)
        scores.append(
            create_experiment_report(get_metrics(y_test, y_pred), kwargs))
        create_checkpoint({'experiments': scores}, EXPERIMENT_PATH)
    return {'experiments': scores}
Exemple #9
0
def train_window(x_train: List, x_test: List, y_train: np.array,
                 y_test: np.array) -> Dict:
    sc = CustomMinMaxScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)

    scores = []
    for w in range(1, 50, 2):
        print('Window:', w)
        model = VanillaTCN(epochs=1, window=w)

        model.fit(x_train[y_train == 0])
        y_pred = model.predict(x_test)  # return reconstruction errors

        theta, f1 = find_optimal_threshold(y_test, y_pred)
        y_pred = classify(y_pred, theta)
        metrics_report(y_test, y_pred)
        scores.append(
            create_experiment_report(get_metrics(y_test, y_pred),
                                     {'window': w}))
        create_checkpoint(
            {'experiments': scores},
            '../../models/TCN-cropped-window-embeddings-HDFS1.json')
    return {'experiments': scores}
    "model_path": "../../models/aetcn/4f5f4682-1ca5-400a-a340-6243716690c0.pt",
    "threshold": 0.00331703620031476
}

X = load_pickle_file(
    '../../data/processed/HDFS1/X-val-HDFS1-cv1-1-block.npy')[:1000]
y = np.load('../../data/processed/HDFS1/y-val-HDFS1-cv1-1-block.npy')[:1000]

# list of matrices, a matrix == blk_id -> NumPy [[005515, ...], ...] (n_logs x 100)

# F1 = (2 * r * p) / (r + p)

n_examples = 700

sc = CustomMinMaxScaler()  # range 0 -- 1
x_train = sc.fit_transform(X[:n_examples])
y_train = y[:n_examples]
x_test = sc.transform(X[n_examples:])
y_test = y[n_examples:]

model = AETCN()
model.set_params(**config['hyperparameters'])

model.fit(x_train[y_train == 0])  # 0 -> normal, 1 -> anomaly
y_pred = model.predict(x_test)  # return reconstruction errors

theta, f1 = find_optimal_threshold(y_test, y_pred)
y_pred = classify(y_pred, theta)
metrics_report(y_test, y_pred)
confusion_matrix(y_test, y_pred)