matrix_scores_val /= transformer.n_transforms labels = y_test.flatten() == single_class_ind # plot_matrix_score(x_test, matrix_scores, labels, plot_inliers=True, # n_to_plot=5) # plot_matrix_score(x_test, matrix_scores, labels, plot_inliers=False, # n_to_plot=5) plot_histogram_disc_loss_acc_thr( np.trace(matrix_scores_test[labels], axis1=1, axis2=2), np.trace(matrix_scores_test[~labels], axis1=1, axis2=2), path='../results', x_label_name='TransTransformations_matrixTrace_hits', val_inliers_score=np.trace(matrix_scores_val)) entropy_scores_test = get_entropy(matrix_scores_test) entropy_scores_val = get_entropy(matrix_scores_val) plot_histogram_disc_loss_acc_thr( entropy_scores_test[labels], entropy_scores_test[~labels], path='../results', x_label_name='TransTransformations_entropy_scores_hits', val_inliers_score=entropy_scores_val) ## Get logits for xentropy # Get matrix scores matrix_scores_raw_test = np.zeros( (len(x_test), transformer.n_transforms, transformer.n_transforms)) for t_ind in tqdm(range(transformer.n_transforms)): test_specific_transform_indxs = np.where( transformations_inds_test == t_ind)
def test_model_original(transformer, loader, dataset_name='hits-4-c', single_class_ind=1): results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results') save_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data') utils.check_path(results_dir) utils.check_path(save_dir) utils.check_path(os.path.join(results_dir, dataset_name)) # load-save data (x_train, y_train), (x_val, y_val), (x_test, y_test) = loader(return_val=True) normal_data = (x_train, y_train), (x_val, y_val), (x_test, y_test) utils.save_pickle( normal_data, os.path.join(save_dir, 'normal_data_%s_tf1_original.pkl' % dataset_name)) # create model n, k = (10, 4) mdl = create_wide_residual_network(x_train.shape[1:], transformer.n_transforms, n, k) mdl.compile('adam', 'categorical_crossentropy', ['acc']) # get inliers of specific class # get inliers x_train_task = x_train[y_train.flatten() == single_class_ind] print(x_train_task.shape) # transform inliers transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task)) x_train_task_transformed = transformer.transform_batch( np.repeat(x_train_task, transformer.n_transforms, axis=0), transformations_inds) print(x_train_task_transformed.shape) # train model batch_size = 128 mdl.fit(x=x_train_task_transformed, y=to_categorical(transformations_inds), batch_size=batch_size, epochs=int(np.ceil(200 / transformer.n_transforms))) scores = np.zeros((len(x_test), )) matrix_evals = np.zeros( (len(x_test), transformer.n_transforms, transformer.n_transforms)) observed_data = x_train_task for t_ind in range(transformer.n_transforms): observed_dirichlet = mdl.predict(transformer.transform_batch( observed_data, [t_ind] * len(observed_data)), batch_size=1024) log_p_hat_train = np.log(observed_dirichlet).mean(axis=0) alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet) alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train) x_test_p = mdl.predict(transformer.transform_batch( x_test, [t_ind] * len(x_test)), batch_size=1024) matrix_evals[:, :, t_ind] += x_test_p scores += dirichlet_normality_score(mle_alpha_t, x_test_p) scores /= transformer.n_transforms matrix_evals /= transformer.n_transforms scores_simple = np.trace(matrix_evals, axis1=1, axis2=2) scores_entropy = -1 * get_entropy(matrix_evals) scores_xH = -1 * get_xH(transformer, matrix_evals) labels = y_test.flatten() == single_class_ind save_results_file(results_dir, dataset_name, single_class_ind, scores=scores, labels=labels, experiment_name='transformations') save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_simple, labels=labels, experiment_name='transformations-simple') save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_entropy, labels=labels, experiment_name='transformations-entropy') save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_xH, labels=labels, experiment_name='transformations-xH') mdl_weights_name = '{}_tf1_original_{}_weights.h5'.format( dataset_name, get_class_name_from_index(single_class_ind, dataset_name)) mdl_weights_path = os.path.join(results_dir, dataset_name, mdl_weights_name) mdl.save_weights(mdl_weights_path) """ Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c') 00:06:58.37 (0.9917134999999999, 0.9350055, 0.9872614999999999, 0.94142025) (0.9938067500000001, 0.9923547500000001, 0.9931685, 0.992637375) (0.9912172499999999, 0.9883357499999998, 0.9909070000000001, 0.9886706249999999) #train only Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c', tf_version='tf1') 00:03:48.29 """ return get_roc_auc(scores, labels), get_roc_auc(scores_simple, labels), \ get_roc_auc(scores_entropy, labels), get_roc_auc(scores_xH, labels)
def test_model_loading(transformer, mdl, loader, dataset_name='hits-4-c', single_class_ind=1, tf_version='tf1', transformer_name='transformed', model_name='resnet', epochs=None): results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results') save_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data') utils.check_path(results_dir) utils.check_path(save_dir) utils.check_path(os.path.join(results_dir, dataset_name)) # load-save data normal_data_path = os.path.join( save_dir, 'normal_data_%s_%s_loading.pkl' % (dataset_name, tf_version)) if os.path.exists(normal_data_path): (x_train, y_train), (x_val, y_val), (x_test, y_test) = pd.read_pickle(normal_data_path) else: (x_train, y_train), (x_val, y_val), (x_test, y_test) = loader(return_val=True) normal_data = (x_train, y_train), (x_val, y_val), (x_test, y_test) utils.save_pickle(normal_data, normal_data_path) # create model # n, k = (10, 4) # mdl = create_wide_residual_network( # x_train.shape[1:], transformer.n_transforms, n, k) mdl.compile('adam', 'categorical_crossentropy', ['acc']) # selec inliers x_train = x_train[y_train.flatten() == single_class_ind] x_val = x_val[y_val.flatten() == single_class_ind] # load-save transformed data transformed_data_path = os.path.join( save_dir, '%s_data_%s_%s_loading.pkl' % (transformer_name, dataset_name, tf_version)) if os.path.exists(transformed_data_path): (x_train_transform_tf1, y_train_transform_tf1), (x_val_transform_tf1, y_val_transform_tf1), ( x_test_transform_tf1, y_test_transform_tf1) = pd.read_pickle(transformed_data_path) else: # transform all data y_train_transform_tf1 = np.tile(np.arange(transformer.n_transforms), len(x_train)) x_train_transform_tf1 = transformer.transform_batch( np.repeat(x_train, transformer.n_transforms, axis=0), y_train_transform_tf1) y_val_transform_tf1 = np.tile(np.arange(transformer.n_transforms), len(x_val)) x_val_transform_tf1 = transformer.transform_batch( np.repeat(x_val, transformer.n_transforms, axis=0), y_val_transform_tf1) y_test_transform_tf1 = np.tile(np.arange(transformer.n_transforms), len(x_test)) x_test_transform_tf1 = transformer.transform_batch( np.repeat(x_test, transformer.n_transforms, axis=0), y_test_transform_tf1) transformed_data = ((x_train_transform_tf1, y_train_transform_tf1), (x_val_transform_tf1, y_val_transform_tf1), (x_test_transform_tf1, y_test_transform_tf1)) utils.save_pickle(transformed_data, transformed_data_path) print(x_train.shape) print(x_train_transform_tf1.shape) print(x_test.shape) print(x_test_transform_tf1.shape) # train model batch_size = 128 if epochs is None: epochs = int(np.ceil(200 / transformer.n_transforms)) mdl.fit(x=x_train_transform_tf1, y=to_categorical(y_train_transform_tf1), batch_size=batch_size, epochs=epochs) scores = np.zeros((len(x_test), )) matrix_evals = np.zeros( (len(x_test), transformer.n_transforms, transformer.n_transforms)) x_pred_train = mdl.predict(x_train_transform_tf1, batch_size=1024) x_pred_test = mdl.predict(x_test_transform_tf1, batch_size=1024) print(x_pred_train.shape) print(x_pred_test.shape) for t_ind in range(transformer.n_transforms): ind_x_pred_equal_to_t_ind = np.where(y_train_transform_tf1 == t_ind)[0] observed_dirichlet = x_pred_train[ind_x_pred_equal_to_t_ind] log_p_hat_train = np.log(observed_dirichlet).mean(axis=0) alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet) alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train) ind_x_pred_test_equal_to_t_ind = np.where( y_test_transform_tf1 == t_ind)[0] x_test_p = x_pred_test[ind_x_pred_test_equal_to_t_ind] matrix_evals[:, :, t_ind] += x_test_p scores += dirichlet_normality_score(mle_alpha_t, x_test_p) scores /= transformer.n_transforms matrix_evals /= transformer.n_transforms scores_simple = np.trace(matrix_evals, axis1=1, axis2=2) scores_entropy = -1 * get_entropy(matrix_evals) scores_xH = -1 * get_xH(transformer, matrix_evals) labels = y_test.flatten() == single_class_ind save_results_file(results_dir, dataset_name, single_class_ind, scores=scores, labels=labels, experiment_name='%s-%s-loading-%s' % (model_name, transformer_name, tf_version)) save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_simple, labels=labels, experiment_name='%s-%s-simple-loading-%s' % (model_name, transformer_name, tf_version)) save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_entropy, labels=labels, experiment_name='%s-%s-entropy-loading-%s' % (model_name, transformer_name, tf_version)) save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_xH, labels=labels, experiment_name='%s-%s-xH-loading-%s' % (model_name, transformer_name, tf_version)) mdl_weights_name = '{}_{}_{}_{}_loading_{}_weights.h5'.format( model_name, transformer_name, dataset_name, tf_version, get_class_name_from_index(single_class_ind, dataset_name)) mdl_weights_path = os.path.join(results_dir, dataset_name, mdl_weights_name) mdl.save_weights(mdl_weights_path) reset_weights() """ Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c', tf_version='tf1') 00:04:31.65 (0.992217, 0.9895665, 0.99131725, 0.989478125) (0.99240075, 0.9900822499999999, 0.99215325, 0.9901300000000001) """ return get_roc_auc(scores, labels), get_roc_auc(scores_simple, labels), \ get_roc_auc(scores_entropy, labels), get_roc_auc(scores_xH, labels)
def _kernal_plus_transformations_experiment(dataset_load_fn, dataset_name, single_class_ind, gpu_q): # gpu_to_use = gpu_q.get() # os.environ["CUDA_VISIBLE_DEVICES"] = gpu_to_use (x_train, y_train), (x_test, y_test) = dataset_load_fn() if dataset_name in ['cats-vs-dogs']: transformer = None else: transformer = PlusKernelTransformer(translation_x=8, translation_y=8, rotations=1, flips=1, gauss=1, log=1) n, k = (10, 4) mdl = create_wide_residual_network(x_train.shape[1:], transformer.n_transforms, n, k) mdl.compile('adam', 'categorical_crossentropy', ['acc']) # get inliers of specific class x_train_task = x_train[y_train.flatten() == single_class_ind] # [0_i, ..., (N_transforms-1)_i, ..., ..., 0_N_samples, ..., # (N_transforms-1)_N_samples] shape: (N_transforms*N_samples,) transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task)) x_train_task_transformed = transformer.transform_batch( np.repeat(x_train_task, transformer.n_transforms, axis=0), transformations_inds) batch_size = 128 mdl.fit( x=x_train_task_transformed, y=to_categorical(transformations_inds), batch_size=batch_size, epochs=2 #int(np.ceil(200/transformer.n_transforms)) ) scores = np.zeros((len(x_test), )) matrix_evals = np.zeros( (len(x_test), transformer.n_transforms, transformer.n_transforms)) observed_data = x_train_task for t_ind in range(transformer.n_transforms): observed_dirichlet = mdl.predict(transformer.transform_batch( observed_data, [t_ind] * len(observed_data)), batch_size=1024) log_p_hat_train = np.log(observed_dirichlet).mean(axis=0) alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet) alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train) x_test_p = mdl.predict(transformer.transform_batch( x_test, [t_ind] * len(x_test)), batch_size=1024) matrix_evals[:, :, t_ind] += x_test_p scores += dirichlet_normality_score(mle_alpha_t, x_test_p) scores /= transformer.n_transforms matrix_evals /= transformer.n_transforms scores_simple = np.trace(matrix_evals, axis1=1, axis2=2) scores_entropy = -1 * get_entropy(matrix_evals) scores_xH = -1 * get_xH(transformer, matrix_evals) labels = y_test.flatten() == single_class_ind save_results_file(dataset_name, single_class_ind, scores=scores, labels=labels, experiment_name='kernel-plus-transformations') save_results_file(dataset_name, single_class_ind, scores=scores_simple, labels=labels, experiment_name='kernel-plus-transformations-simple') save_results_file(dataset_name, single_class_ind, scores=scores_entropy, labels=labels, experiment_name='kernel-plus-transformations-entropy') save_results_file(dataset_name, single_class_ind, scores=scores_xH, labels=labels, experiment_name='kernel-plus-transformations-xH') mdl_weights_name = '{}_kernel-plus-transformations_{}_{}_weights.h5'.format( dataset_name, get_class_name_from_index(single_class_ind, dataset_name), datetime.datetime.now().strftime('%Y-%m-%d-%H%M')) mdl_weights_path = os.path.join(RESULTS_DIR, dataset_name, mdl_weights_name) mdl.save_weights(mdl_weights_path)