def random_search(data_and_labels: tuple, model: TransformerAutoEncoder, params: Dict) -> Dict: x_train, x_test, _, y_test = data_and_labels scores = [] for conf in zip(*params.values()): kwargs = {k: val for k, val in zip(params.keys(), conf)} model.set_params(**kwargs) print(f'Model current hyperparameters are: {kwargs}.') model.fit(x_train) y_pred = model.predict(x_test) # return reconstruction errors theta, f1 = find_optimal_threshold(y_test, y_pred) y_pred = classify(y_pred, theta) metrics_report(y_test, y_pred) scores.append( create_experiment_report(get_metrics(y_test, y_pred), kwargs)) # visualize_distribution_with_labels(y_pred, y_test, to_file=False) from sklearn.metrics import confusion_matrix print(confusion_matrix(y_test, y_pred)) create_checkpoint({'experiments': scores}, EXPERIMENT_PATH) return {'experiments': scores}
def random_search(data_and_labels: tuple, model: Union[AutoEncoder, VanillaTCN, AETCN, AECNN1D, CNN1D, CNN2D, TCNCNN1D, SACNN1D, SACNN2D], params: Dict) -> Dict: x_train, x_test, _, y_test = data_and_labels scores = [] for experiment in params['experiments']: model.set_params(**experiment['hyperparameters']) print(f'Model current hyperparameters are: {experiment["hyperparameters"]}.') model.fit(x_train) y_pred = model.predict(x_test) # return reconstruction errors theta, f1 = find_optimal_threshold(y_test, y_pred) y_pred = classify(y_pred, theta) metrics_report(y_test, y_pred) model_path = create_model_path(DIR_TO_EXPERIMENTS, str(uuid.uuid4())) torch.save(model, model_path) res = create_experiment_report(get_metrics(y_test, y_pred), experiment['hyperparameters'], theta, model_path) scores.append(res) create_checkpoint({'experiments': scores}, EXPERIMENT_PATH) return { 'experiments': scores }
def evaluate(x_test: np.ndarray, y_test: np.array, experiments: Dict) -> Dict: model_config = find_best_model(experiments) model = torch.load(model_config['model_path']) theta = model_config['threshold'] y_pred = model.predict(x_test) # return reconstruction errors np.savez('preds', y_pred=y_pred, y_test=y_test) auc_score = roc_auc_score(y_test, y_pred) y_pred = classify(y_pred, theta) metrics_report(y_test, y_pred) # print('# trainable params:', sum(p.numel() for p in model._model.parameters() if p.requires_grad), ',# params:', sum(p.numel() for p in model._model.parameters())) return create_report( model_config, { **get_metrics(y_test, y_pred), 'auc_score': float(auc_score) })
def get_extracted_features(x_train: List, x_val: List, x_test: List, y_val: np.array): sc = CustomMinMaxScaler() x_train = sc.fit_transform(x_train) x_val = sc.transform(x_val) x_test = sc.transform(x_test) model = torch.load( '../../models/aetcn/5d9ad591-6d3c-428f-894f-02af96ca1930.pt') y_pred = model.predict(x_val) # return reconstruction errors train_features = model.extract_features(x_train).astype(dtype=np.float32) val_features = model.extract_features(x_val).astype(dtype=np.float32) test_features = model.extract_features(x_test).astype(dtype=np.float32) theta, f1 = find_optimal_threshold(y_val, y_pred) y_pred = classify(y_pred, theta) metrics_report(y_val, y_pred) return train_features, val_features, test_features
def random_search(data_and_labels: tuple, model: Union[AutoEncoder, IsolationForest], params: Dict) -> Dict: x_train, x_test, _, y_test = data_and_labels scores = [] for conf in zip(*params.values()): kwargs = {k: val for k, val in zip(params.keys(), conf)} model.set_params(**kwargs) print(f'Model current hyperparameters are: {kwargs}.') model.fit(x_train) y_pred = model.predict(x_test) # return reconstruction errors theta, f1 = find_optimal_threshold(y_test, y_pred) y_pred = classify(y_pred, theta) metrics_report(y_test, y_pred) scores.append( create_experiment_report(get_metrics(y_test, y_pred), kwargs)) create_checkpoint({'experiments': scores}, EXPERIMENT_PATH) return {'experiments': scores}
def train_window(x_train: List, x_test: List, y_train: np.array, y_test: np.array) -> Dict: sc = CustomMinMaxScaler() x_train = sc.fit_transform(x_train) x_test = sc.transform(x_test) scores = [] for w in range(1, 50, 2): print('Window:', w) model = VanillaTCN(epochs=1, window=w) model.fit(x_train[y_train == 0]) y_pred = model.predict(x_test) # return reconstruction errors theta, f1 = find_optimal_threshold(y_test, y_pred) y_pred = classify(y_pred, theta) metrics_report(y_test, y_pred) scores.append( create_experiment_report(get_metrics(y_test, y_pred), {'window': w})) create_checkpoint( {'experiments': scores}, '../../models/TCN-cropped-window-embeddings-HDFS1.json') return {'experiments': scores}
"model_path": "../../models/aetcn/4f5f4682-1ca5-400a-a340-6243716690c0.pt", "threshold": 0.00331703620031476 } X = load_pickle_file( '../../data/processed/HDFS1/X-val-HDFS1-cv1-1-block.npy')[:1000] y = np.load('../../data/processed/HDFS1/y-val-HDFS1-cv1-1-block.npy')[:1000] # list of matrices, a matrix == blk_id -> NumPy [[005515, ...], ...] (n_logs x 100) # F1 = (2 * r * p) / (r + p) n_examples = 700 sc = CustomMinMaxScaler() # range 0 -- 1 x_train = sc.fit_transform(X[:n_examples]) y_train = y[:n_examples] x_test = sc.transform(X[n_examples:]) y_test = y[n_examples:] model = AETCN() model.set_params(**config['hyperparameters']) model.fit(x_train[y_train == 0]) # 0 -> normal, 1 -> anomaly y_pred = model.predict(x_test) # return reconstruction errors theta, f1 = find_optimal_threshold(y_test, y_pred) y_pred = classify(y_pred, theta) metrics_report(y_test, y_pred) confusion_matrix(y_test, y_pred)