def train_autoencoder(x_train: Dict, x_test: Dict, y_train: np.array, y_test: np.array) -> Dict: fe = FeatureExtractor(method='tf-idf', preprocessing='mean') y_train = get_labels_from_csv(y_train, x_train.keys()) y_test = get_labels_from_csv(y_test, x_test.keys()) x_train = fe.fit_transform(x_train) x_test = fe.transform(x_test) model = AutoEncoder() n_experiments = 100 params = { 'epochs': np.random.choice(np.arange(1, 10), size=n_experiments).tolist(), 'learning_rate': np.random.choice(10**np.linspace(-4, -0.1), size=n_experiments).tolist(), 'batch_size': np.random.choice([2**i for i in range(3, 8)], size=n_experiments).tolist(), 'input_dim': [48] * n_experiments, 'layers': generate_layer_settings(n_experiments), 'dropout': np.random.uniform(0, 0.5, size=n_experiments).tolist() } evaluated_hyperparams = random_search( (x_train[y_train == 0], x_test, None, y_test), model, params) return evaluated_hyperparams
def train_autoencoder(x_train: Dict, x_test: Dict, y_train: pd.DataFrame, y_test: pd.DataFrame) -> Dict: fe = FeatureExtractor(method='tf-idf', preprocessing='mean') y_train = get_labels_from_csv(y_train, x_train.keys()) y_test = get_labels_from_csv(y_test, x_test.keys()) x_train = fe.fit_transform(x_train) x_test = fe.transform(x_test) model = AutoEncoder() experiments = load_experiment('../../models/AE-hyperparameters-Drain3-HDFS1.json') evaluated_hyperparams = random_search((x_train[y_train == 0], x_test, None, y_test), model, experiments) return evaluated_hyperparams
def evaluate_lof(x_train: Dict, x_test: Dict, y_test: np.array) -> Dict: fe = FeatureExtractor(method='tf-idf', preprocessing='mean') y_test = get_labels_from_csv(y_test, x_test.keys()) fe.fit_transform(x_train) x_test = fe.transform(x_test) training_stats = load_experiment( '../../models/lof_baseline/experiments.json') score = evaluate_unsupervised(x_test, y_test, training_stats['experiments']) return score
def train_iso_forest(x_train: Dict, x_test: Dict, y_train: pd.DataFrame, y_test: pd.DataFrame) -> Dict: fe = FeatureExtractor(method='tf-idf', preprocessing='mean') y_test = get_labels_from_csv(y_test, x_test.keys()) x_train = fe.fit_transform(x_train) x_test = fe.transform(x_test) clf = IsolationForest(bootstrap=True, n_jobs=os.cpu_count(), random_state=SEED) experiments = load_experiment('../../models/IF-hyperparameters-Drain3-HDFS1.json') evaluated_hyperparams = random_search_unsupervised((x_train, x_test, None, y_test), clf, experiments) return evaluated_hyperparams
def train_lof(x_train: Dict, x_test: Dict, y_train: pd.DataFrame, y_test: pd.DataFrame) -> Dict: fe = FeatureExtractor(method='tf-idf', preprocessing='mean') y_test = get_labels_from_csv(y_test, x_test.keys()) fe.fit_transform(x_train) x_test = fe.transform(x_test) clf = LocalOutlierFactor(n_jobs=os.cpu_count()) experiments = load_experiment('../../models/LOF-hyperparameters-Drain3-HDFS1.json') evaluated_hyperparams = random_search_unsupervised((None, x_test, None, y_test), clf, experiments) return evaluated_hyperparams