def evaluate_sa_cnn2d(x_train: List, x_test: List, y_test: np.array) -> Dict:
    sc = CustomMinMaxScaler()
    x_test = sc.fit(x_train).transform(x_test)

    training_stats = load_experiment('../../models/sa_cnn2d/experiments.json')
    score = evaluate(x_test, y_test, training_stats['experiments'])
    return score
Ejemplo n.º 2
0
def train_aetcnn(x_train: List, x_test: List, y_train: np.array,
                 y_test: np.array) -> Dict:
    sc = CustomMinMaxScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)

    model = AETCN()
    n_experiments = 100
    embeddings_dim = x_train[0].shape[1]

    params = {
        'epochs':
        np.random.choice(np.arange(1, 10), size=n_experiments).tolist(),
        'learning_rate':
        np.random.choice(10**np.linspace(-4, -0.5),
                         size=n_experiments).tolist(),
        'batch_size':
        np.random.choice([2**i for i in range(3, 8)],
                         size=n_experiments).tolist(),
        'input_shape': [embeddings_dim] * n_experiments,
        'layers':
        generate_layer_settings(embeddings_dim, n_experiments),
        'kernel_size':
        np.random.choice([2 * i + 1 for i in range(1, 6)],
                         size=n_experiments).tolist(),
        'window':
        np.random.randint(10, 100, size=n_experiments).tolist(),
        'dropout':
        np.random.uniform(0, 0.5, size=n_experiments).tolist()
    }
    evaluated_hyperparams = random_search(
        (x_train[y_train == 0], x_test, None, y_test), model, params)
    return evaluated_hyperparams
def get_extracted_features(x_train: List, x_val: List, x_test: List,
                           y_val: np.array):
    sc = CustomMinMaxScaler()
    x_train = sc.fit_transform(x_train)
    x_val = sc.transform(x_val)
    x_test = sc.transform(x_test)

    model = torch.load(
        '../../models/aetcn/5d9ad591-6d3c-428f-894f-02af96ca1930.pt')

    y_pred = model.predict(x_val)  # return reconstruction errors
    train_features = model.extract_features(x_train).astype(dtype=np.float32)
    val_features = model.extract_features(x_val).astype(dtype=np.float32)
    test_features = model.extract_features(x_test).astype(dtype=np.float32)

    theta, f1 = find_optimal_threshold(y_val, y_pred)
    y_pred = classify(y_pred, theta)
    metrics_report(y_val, y_pred)
    return train_features, val_features, test_features
Ejemplo n.º 4
0
def train_sa_cnn1d(x_train: List, x_test: List, y_train: np.array,
                   y_test: np.array) -> Dict:
    sc = CustomMinMaxScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)

    model = SACNN1D()
    n_experiments = 100
    embeddings_dim = x_train[0].shape[1]

    encoder_kernel_sizes = np.random.choice([2 * i + 1 for i in range(1, 4)],
                                            size=n_experiments).tolist()
    layers = generate_layer_settings(embeddings_dim, n_experiments)
    params = {
        'epochs':
        np.random.choice(np.arange(1, 10), size=n_experiments).tolist(),
        'learning_rate':
        np.random.choice(10**np.linspace(-4, -0.5),
                         size=n_experiments).tolist(),
        'batch_size':
        np.random.choice([2**i for i in range(3, 8)],
                         size=n_experiments).tolist(),
        'input_shape': [embeddings_dim] * n_experiments,
        'layers':
        layers,
        'encoder_kernel_size':
        encoder_kernel_sizes,
        'decoder_kernel_size':
        np.random.choice([2 * i + 1 for i in range(2, 7)],
                         size=n_experiments).tolist(),
        'encoder_heads':
        get_encoder_heads(layers),
        'decoder_heads':
        get_decoder_heads(layers),
        'window':
        get_1d_window_size(encoder_kernel_sizes, layers, get_encoder_size),
        'dropout':
        np.random.uniform(0, 0.3, size=n_experiments).tolist()
    }
    evaluated_hyperparams = random_search(
        (x_train[y_train == 0], x_test, None, y_test), model, params)
    return evaluated_hyperparams
Ejemplo n.º 5
0
def train_transformer(x_train: List, x_test: List, y_train: np.array,
                      y_test: np.array) -> Dict:
    sc = CustomMinMaxScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)

    model = TransformerAutoEncoder()
    n_experiments = 100
    embeddings_dim = x_train[0].shape[1]

    divisors = get_all_divisors(embeddings_dim)
    params = {
        'epochs':
        np.random.choice(np.arange(1, 5), size=n_experiments).tolist(),
        'learning_rate':
        np.random.choice(10**np.linspace(-4, -0.5),
                         size=n_experiments).tolist(),
        'batch_size':
        np.random.choice([2**i for i in range(3, 8)],
                         size=n_experiments).tolist(),
        'input_dim': [embeddings_dim] * n_experiments,
        'heads':
        np.random.choice(divisors,
                         size=n_experiments,
                         p=get_normal_dist(divisors)).tolist(),
        'n_encoders':
        np.random.randint(1, 5, size=n_experiments).tolist(),
        'n_decoders':
        np.random.randint(1, 5, size=n_experiments).tolist(),
        'dim_feedforward':
        np.random.randint(100, 2000, size=n_experiments).tolist(),
        'window':
        np.random.randint(10, 100, size=n_experiments).tolist(),
        'dropout':
        np.random.uniform(0, 0.5, size=n_experiments).tolist()
    }
    evaluated_hyperparams = random_search(
        (x_train[y_train == 0], x_test, None, y_test), model, params)
    return evaluated_hyperparams
Ejemplo n.º 6
0
def train_window(x_train: List, x_test: List, y_train: np.array,
                 y_test: np.array) -> Dict:
    sc = CustomMinMaxScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)

    scores = []
    for w in range(1, 50, 2):
        print('Window:', w)
        model = VanillaTCN(epochs=1, window=w)

        model.fit(x_train[y_train == 0])
        y_pred = model.predict(x_test)  # return reconstruction errors

        theta, f1 = find_optimal_threshold(y_test, y_pred)
        y_pred = classify(y_pred, theta)
        metrics_report(y_test, y_pred)
        scores.append(
            create_experiment_report(get_metrics(y_test, y_pred),
                                     {'window': w}))
        create_checkpoint(
            {'experiments': scores},
            '../../models/TCN-cropped-window-embeddings-HDFS1.json')
    return {'experiments': scores}
Ejemplo n.º 7
0
    # not used currently
    "model_path": "../../models/aetcn/4f5f4682-1ca5-400a-a340-6243716690c0.pt",
    "threshold": 0.00331703620031476
}

X = load_pickle_file(
    '../../data/processed/HDFS1/X-val-HDFS1-cv1-1-block.npy')[:1000]
y = np.load('../../data/processed/HDFS1/y-val-HDFS1-cv1-1-block.npy')[:1000]

# list of matrices, a matrix == blk_id -> NumPy [[005515, ...], ...] (n_logs x 100)

# F1 = (2 * r * p) / (r + p)

n_examples = 700

sc = CustomMinMaxScaler()  # range 0 -- 1
x_train = sc.fit_transform(X[:n_examples])
y_train = y[:n_examples]
x_test = sc.transform(X[n_examples:])
y_test = y[n_examples:]

model = AETCN()
model.set_params(**config['hyperparameters'])

model.fit(x_train[y_train == 0])  # 0 -> normal, 1 -> anomaly
y_pred = model.predict(x_test)  # return reconstruction errors

theta, f1 = find_optimal_threshold(y_test, y_pred)
y_pred = classify(y_pred, theta)
metrics_report(y_test, y_pred)
confusion_matrix(y_test, y_pred)