def test_tf1_normal_data_on_tf2_transformer_model_original( mdl: TransformODModel, transformer, dataset_name='hits-4-c', tf_version='tf1', epochs=None): """Dirichlet as in object""" results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results') data_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data') utils.check_path(data_dir) utils.check_path(os.path.join(results_dir, dataset_name)) # load data normal_data_path = os.path.join( data_dir, 'normal_data_%s_%s_loading.pkl' % (dataset_name, tf_version)) (x_train, y_train), (x_val, y_val), (x_test, y_test) = pd.read_pickle(normal_data_path) x_train = x_train[y_train == 1] x_val = x_val[y_val == 1] # train model batch_size = 128 if epochs is None: epochs = int(np.ceil(200 / transformer.n_transforms)) mdl.fit(x_train, x_val, train_batch_size=batch_size, epochs=epochs) met_dict = mdl.evaluate_od(x_train, x_test, y_test, dataset_name, 'real', x_val) return met_dict[general_keys.DIRICHLET]['roc_auc'], \ met_dict[general_keys.MATRIX_TRACE]['roc_auc'], \ met_dict[general_keys.ENTROPY]['roc_auc'], \ met_dict[general_keys.CROSS_ENTROPY]['roc_auc']
def test_all_tf2(mdl: TransformODModel, transformer, data_loader, dataset_name='hits-4-c', epochs=None): """Dirichlet as in object""" results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results') data_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data') utils.check_path(data_dir) utils.check_path(os.path.join(results_dir, dataset_name)) # load data (x_train, y_train), (x_val, y_val), ( x_test, y_test) = data_loader.get_outlier_detection_datasets() # train model batch_size = 128 if epochs is None: epochs = int(np.ceil(200 / transformer.n_transforms)) mdl.fit(x_train, x_val, train_batch_size=batch_size, epochs=epochs) met_dict = mdl.evaluate_od(x_train, x_test, y_test, dataset_name, 'real', x_val) return met_dict[general_keys.DIRICHLET]['roc_auc'], \ met_dict[general_keys.MATRIX_TRACE]['roc_auc'], \ met_dict[general_keys.ENTROPY]['roc_auc'], \ met_dict[general_keys.CROSS_ENTROPY]['roc_auc']
def test_tf1_transformed_data_on_tf2_model_original( mdl: AlreadyTransformODModel, transformer, dataset_name='hits-4-c', single_class_ind=1, tf_version='tf1', transformer_name='transtransformed', model_name='resnet', epochs=None): """Dirichlet as in object""" results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results') data_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data') utils.check_path(data_dir) utils.check_path(os.path.join(results_dir, dataset_name)) # load data normal_data_path = os.path.join( data_dir, 'normal_data_%s_%s_loading.pkl' % (dataset_name, tf_version)) (x_train, y_train), (x_val, y_val), (x_test, y_test) = pd.read_pickle(normal_data_path) # load transformed data transformed_data_path = os.path.join( data_dir, '%s_data_%s_%s_loading.pkl' % (transformer_name, dataset_name, tf_version)) (x_train_transform_tf1, y_train_transform_tf1), (x_val_transform_tf1, y_val_transform_tf1), ( x_test_transform_tf1, y_test_transform_tf1) = pd.read_pickle(transformed_data_path) print(x_train.shape) print(x_train_transform_tf1.shape) print(x_test.shape) print(x_test_transform_tf1.shape) # train model batch_size = 128 if epochs is None: epochs = int(np.ceil(200 / transformer.n_transforms)) mdl.fit((x_train_transform_tf1, y_train_transform_tf1), (x_val_transform_tf1, y_val_transform_tf1), train_batch_size=batch_size, epochs=epochs) met_dict = mdl.evaluate_od((x_train_transform_tf1, y_train_transform_tf1), (x_test_transform_tf1, y_test_transform_tf1), y_test, dataset_name, 'real', (x_val_transform_tf1, y_val_transform_tf1)) """ roc_auc dirichlet 0.9896582500000001 matrix_trace 0.9541035 entropy 0.9820515000000001 cross_entropy 0.9614397499999999 mutual_information 0.9889197499999999 """ return met_dict[general_keys.DIRICHLET]['roc_auc'], \ met_dict[general_keys.MATRIX_TRACE]['roc_auc'], \ met_dict[general_keys.ENTROPY]['roc_auc'], \ met_dict[general_keys.CROSS_ENTROPY]['roc_auc']
def _get_template_save_path(self) -> str: """get name of final saved file to check if it's been already generated""" text_to_add = 'generated_%s/seed%i_crop%s_nChannels%i' % ( self.name, self.random_seed, str( self.crop_size), len(self.used_channels)) save_path = utils.add_text_to_beginning_of_file_path( self.data_path, text_to_add) utils.check_path(os.path.dirname(os.path.abspath(save_path))) return save_path
def run_experiments(load_dataset_fn, dataset_name, q, class_idx, n_runs): check_path(os.path.join(RESULTS_DIR, dataset_name)) # # Kernel-plus-Transformations # for _ in range(n_runs): # processes = [Process(target=_kernal_plus_transformations_experiment, # args=( # load_dataset_fn, dataset_name, class_idx, q))] # for p in processes: # p.start() # for p in processes: # p.join() # # # MO_GAAL # for _ in range(n_runs): # _mo_gaal_experiment(load_dataset_fn, dataset_name, class_idx) # # # IF # for _ in range(n_runs): # _if_experiment(load_dataset_fn, dataset_name, class_idx) # # # CAE OC-SVM # for _ in range(n_runs): # processes = [Process(target=_cae_ocsvm_experiment, # args=(load_dataset_fn, dataset_name, class_idx, q))] # for p in processes: # p.start() # p.join() # # # Raw OC-SVM # for _ in range(n_runs): # _raw_ocsvm_experiment(load_dataset_fn, dataset_name, class_idx) # # Transformations for _ in range(n_runs): processes = [ Process(target=_transformations_experiment, args=(load_dataset_fn, dataset_name, class_idx, q)) ] for p in processes: p.start() for p in processes: p.join() # # Trans-Transformations for _ in range(n_runs): processes = [ Process(target=_trans_transformations_experiment, args=(load_dataset_fn, dataset_name, class_idx, q)) ] for p in processes: p.start() for p in processes: p.join()
def check_best_model_save(self, iteration): if iteration == 0: best_model_weights_folder = os.path.join(PROJECT_PATH, constants.RESULTS, 'aux_weights') utils.check_path(best_model_weights_folder) self.best_model_weights_path = os.path.join( best_model_weights_folder, 'best_weights.ckpt') self.save_weights(self.best_model_weights_path) return self.best_model_so_far[general_keys.NOT_IMPROVED_COUNTER] += 1 if self.eval_loss.result() < self.best_model_so_far[general_keys.LOSS]: self.best_model_so_far[general_keys.LOSS] = self.eval_loss.result() self.best_model_so_far[general_keys.NOT_IMPROVED_COUNTER] = 0 self.best_model_so_far[general_keys.ITERATION] = iteration self.save_weights(self.best_model_weights_path) if self.verbose: print("\nNew best validation model: %s %.4f @ it %d\n" % (general_keys.LOSS, self.best_model_so_far[general_keys.LOSS], self.best_model_so_far[general_keys.ITERATION]), flush=True)
def test_tf1_transformed_data_on_tf2_keras_model_diri( mdl: tf.keras.Model, transformer, dataset_name='hits-4-c', single_class_ind=1, tf_version='tf1', transformer_name='transtransformed', model_name='resnet', epochs=None): results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results') data_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data') utils.check_path(data_dir) utils.check_path(os.path.join(results_dir, dataset_name)) # load data normal_data_path = os.path.join( data_dir, 'normal_data_%s_%s_loading.pkl' % (dataset_name, tf_version)) (x_train, y_train), (x_val, y_val), (x_test, y_test) = pd.read_pickle(normal_data_path) # load transformed data transformed_data_path = os.path.join( data_dir, '%s_data_%s_%s_loading.pkl' % (transformer_name, dataset_name, tf_version)) (x_train_transform_tf1, y_train_transform_tf1), (x_val_transform_tf1, y_val_transform_tf1), ( x_test_transform_tf1, y_test_transform_tf1) = pd.read_pickle(transformed_data_path) print(x_train.shape) print(x_train_transform_tf1.shape) print(x_test.shape) print(x_test_transform_tf1.shape) # train model batch_size = 128 if epochs is None: epochs = int(np.ceil(200 / transformer.n_transforms)) mdl.fit(x_train_transform_tf1, to_categorical(y_train_transform_tf1), validation_data=(x_val_transform_tf1, to_categorical(y_val_transform_tf1)), batch_size=batch_size, epochs=epochs) scores = np.zeros((len(x_test), )) matrix_evals = np.zeros( (len(x_test), transformer.n_transforms, transformer.n_transforms)) x_pred_train = mdl.predict(x_train_transform_tf1, batch_size=1024) x_pred_test = mdl.predict(x_test_transform_tf1, batch_size=1024) print(x_pred_train.shape) print(x_pred_test.shape) for t_ind in range(transformer.n_transforms): ind_x_pred_equal_to_t_ind = np.where(y_train_transform_tf1 == t_ind)[0] observed_dirichlet = x_pred_train[ind_x_pred_equal_to_t_ind] log_p_hat_train = np.log(observed_dirichlet).mean(axis=0) alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet) alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train) ind_x_pred_test_equal_to_t_ind = np.where( y_test_transform_tf1 == t_ind)[0] x_test_p = x_pred_test[ind_x_pred_test_equal_to_t_ind] matrix_evals[:, :, t_ind] += x_test_p scores += dirichlet_normality_score(mle_alpha_t, x_test_p) scores /= transformer.n_transforms matrix_evals /= transformer.n_transforms scores_simple = np.trace(matrix_evals, axis1=1, axis2=2) scores_entropy = -1 * get_entropy(matrix_evals) scores_xH = -1 * get_xH(transformer, matrix_evals) labels = y_test.flatten() == single_class_ind """ Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c', tf_version='tf1') 00:04:31.65 (0.992217, 0.9895665, 0.99131725, 0.989478125) (0.99240075, 0.9900822499999999, 0.99215325, 0.9901300000000001) """ return get_roc_auc(scores, labels), get_roc_auc(scores_simple, labels), \ get_roc_auc(scores_entropy, labels), get_roc_auc(scores_xH, labels)
def test_model_loading(transformer, mdl, loader, dataset_name='hits-4-c', single_class_ind=1, tf_version='tf1', transformer_name='transformed', model_name='resnet', epochs=None): results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results') save_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data') utils.check_path(results_dir) utils.check_path(save_dir) utils.check_path(os.path.join(results_dir, dataset_name)) # load-save data normal_data_path = os.path.join( save_dir, 'normal_data_%s_%s_loading.pkl' % (dataset_name, tf_version)) if os.path.exists(normal_data_path): (x_train, y_train), (x_val, y_val), (x_test, y_test) = pd.read_pickle(normal_data_path) else: (x_train, y_train), (x_val, y_val), (x_test, y_test) = loader(return_val=True) normal_data = (x_train, y_train), (x_val, y_val), (x_test, y_test) utils.save_pickle(normal_data, normal_data_path) # create model # n, k = (10, 4) # mdl = create_wide_residual_network( # x_train.shape[1:], transformer.n_transforms, n, k) mdl.compile('adam', 'categorical_crossentropy', ['acc']) # selec inliers x_train = x_train[y_train.flatten() == single_class_ind] x_val = x_val[y_val.flatten() == single_class_ind] # load-save transformed data transformed_data_path = os.path.join( save_dir, '%s_data_%s_%s_loading.pkl' % (transformer_name, dataset_name, tf_version)) if os.path.exists(transformed_data_path): (x_train_transform_tf1, y_train_transform_tf1), (x_val_transform_tf1, y_val_transform_tf1), ( x_test_transform_tf1, y_test_transform_tf1) = pd.read_pickle(transformed_data_path) else: # transform all data y_train_transform_tf1 = np.tile(np.arange(transformer.n_transforms), len(x_train)) x_train_transform_tf1 = transformer.transform_batch( np.repeat(x_train, transformer.n_transforms, axis=0), y_train_transform_tf1) y_val_transform_tf1 = np.tile(np.arange(transformer.n_transforms), len(x_val)) x_val_transform_tf1 = transformer.transform_batch( np.repeat(x_val, transformer.n_transforms, axis=0), y_val_transform_tf1) y_test_transform_tf1 = np.tile(np.arange(transformer.n_transforms), len(x_test)) x_test_transform_tf1 = transformer.transform_batch( np.repeat(x_test, transformer.n_transforms, axis=0), y_test_transform_tf1) transformed_data = ((x_train_transform_tf1, y_train_transform_tf1), (x_val_transform_tf1, y_val_transform_tf1), (x_test_transform_tf1, y_test_transform_tf1)) utils.save_pickle(transformed_data, transformed_data_path) print(x_train.shape) print(x_train_transform_tf1.shape) print(x_test.shape) print(x_test_transform_tf1.shape) # train model batch_size = 128 if epochs is None: epochs = int(np.ceil(200 / transformer.n_transforms)) mdl.fit(x=x_train_transform_tf1, y=to_categorical(y_train_transform_tf1), batch_size=batch_size, epochs=epochs) scores = np.zeros((len(x_test), )) matrix_evals = np.zeros( (len(x_test), transformer.n_transforms, transformer.n_transforms)) x_pred_train = mdl.predict(x_train_transform_tf1, batch_size=1024) x_pred_test = mdl.predict(x_test_transform_tf1, batch_size=1024) print(x_pred_train.shape) print(x_pred_test.shape) for t_ind in range(transformer.n_transforms): ind_x_pred_equal_to_t_ind = np.where(y_train_transform_tf1 == t_ind)[0] observed_dirichlet = x_pred_train[ind_x_pred_equal_to_t_ind] log_p_hat_train = np.log(observed_dirichlet).mean(axis=0) alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet) alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train) ind_x_pred_test_equal_to_t_ind = np.where( y_test_transform_tf1 == t_ind)[0] x_test_p = x_pred_test[ind_x_pred_test_equal_to_t_ind] matrix_evals[:, :, t_ind] += x_test_p scores += dirichlet_normality_score(mle_alpha_t, x_test_p) scores /= transformer.n_transforms matrix_evals /= transformer.n_transforms scores_simple = np.trace(matrix_evals, axis1=1, axis2=2) scores_entropy = -1 * get_entropy(matrix_evals) scores_xH = -1 * get_xH(transformer, matrix_evals) labels = y_test.flatten() == single_class_ind save_results_file(results_dir, dataset_name, single_class_ind, scores=scores, labels=labels, experiment_name='%s-%s-loading-%s' % (model_name, transformer_name, tf_version)) save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_simple, labels=labels, experiment_name='%s-%s-simple-loading-%s' % (model_name, transformer_name, tf_version)) save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_entropy, labels=labels, experiment_name='%s-%s-entropy-loading-%s' % (model_name, transformer_name, tf_version)) save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_xH, labels=labels, experiment_name='%s-%s-xH-loading-%s' % (model_name, transformer_name, tf_version)) mdl_weights_name = '{}_{}_{}_{}_loading_{}_weights.h5'.format( model_name, transformer_name, dataset_name, tf_version, get_class_name_from_index(single_class_ind, dataset_name)) mdl_weights_path = os.path.join(results_dir, dataset_name, mdl_weights_name) mdl.save_weights(mdl_weights_path) reset_weights() """ Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c', tf_version='tf1') 00:04:31.65 (0.992217, 0.9895665, 0.99131725, 0.989478125) (0.99240075, 0.9900822499999999, 0.99215325, 0.9901300000000001) """ return get_roc_auc(scores, labels), get_roc_auc(scores_simple, labels), \ get_roc_auc(scores_entropy, labels), get_roc_auc(scores_xH, labels)
def test_model_original(transformer, loader, dataset_name='hits-4-c', single_class_ind=1): results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results') save_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data') utils.check_path(results_dir) utils.check_path(save_dir) utils.check_path(os.path.join(results_dir, dataset_name)) # load-save data (x_train, y_train), (x_val, y_val), (x_test, y_test) = loader(return_val=True) normal_data = (x_train, y_train), (x_val, y_val), (x_test, y_test) utils.save_pickle( normal_data, os.path.join(save_dir, 'normal_data_%s_tf1_original.pkl' % dataset_name)) # create model n, k = (10, 4) mdl = create_wide_residual_network(x_train.shape[1:], transformer.n_transforms, n, k) mdl.compile('adam', 'categorical_crossentropy', ['acc']) # get inliers of specific class # get inliers x_train_task = x_train[y_train.flatten() == single_class_ind] print(x_train_task.shape) # transform inliers transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task)) x_train_task_transformed = transformer.transform_batch( np.repeat(x_train_task, transformer.n_transforms, axis=0), transformations_inds) print(x_train_task_transformed.shape) # train model batch_size = 128 mdl.fit(x=x_train_task_transformed, y=to_categorical(transformations_inds), batch_size=batch_size, epochs=int(np.ceil(200 / transformer.n_transforms))) scores = np.zeros((len(x_test), )) matrix_evals = np.zeros( (len(x_test), transformer.n_transforms, transformer.n_transforms)) observed_data = x_train_task for t_ind in range(transformer.n_transforms): observed_dirichlet = mdl.predict(transformer.transform_batch( observed_data, [t_ind] * len(observed_data)), batch_size=1024) log_p_hat_train = np.log(observed_dirichlet).mean(axis=0) alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet) alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train) x_test_p = mdl.predict(transformer.transform_batch( x_test, [t_ind] * len(x_test)), batch_size=1024) matrix_evals[:, :, t_ind] += x_test_p scores += dirichlet_normality_score(mle_alpha_t, x_test_p) scores /= transformer.n_transforms matrix_evals /= transformer.n_transforms scores_simple = np.trace(matrix_evals, axis1=1, axis2=2) scores_entropy = -1 * get_entropy(matrix_evals) scores_xH = -1 * get_xH(transformer, matrix_evals) labels = y_test.flatten() == single_class_ind save_results_file(results_dir, dataset_name, single_class_ind, scores=scores, labels=labels, experiment_name='transformations') save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_simple, labels=labels, experiment_name='transformations-simple') save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_entropy, labels=labels, experiment_name='transformations-entropy') save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_xH, labels=labels, experiment_name='transformations-xH') mdl_weights_name = '{}_tf1_original_{}_weights.h5'.format( dataset_name, get_class_name_from_index(single_class_ind, dataset_name)) mdl_weights_path = os.path.join(results_dir, dataset_name, mdl_weights_name) mdl.save_weights(mdl_weights_path) """ Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c') 00:06:58.37 (0.9917134999999999, 0.9350055, 0.9872614999999999, 0.94142025) (0.9938067500000001, 0.9923547500000001, 0.9931685, 0.992637375) (0.9912172499999999, 0.9883357499999998, 0.9909070000000001, 0.9886706249999999) #train only Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c', tf_version='tf1') 00:03:48.29 """ return get_roc_auc(scores, labels), get_roc_auc(scores_simple, labels), \ get_roc_auc(scores_entropy, labels), get_roc_auc(scores_xH, labels)