Ejemplo n.º 1
0
def test_tf1_normal_data_on_tf2_transformer_model_original(
        mdl: TransformODModel,
        transformer,
        dataset_name='hits-4-c',
        tf_version='tf1',
        epochs=None):
    """Dirichlet as in object"""
    results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results')
    data_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data')
    utils.check_path(data_dir)
    utils.check_path(os.path.join(results_dir, dataset_name))

    # load data
    normal_data_path = os.path.join(
        data_dir, 'normal_data_%s_%s_loading.pkl' % (dataset_name, tf_version))
    (x_train, y_train), (x_val,
                         y_val), (x_test,
                                  y_test) = pd.read_pickle(normal_data_path)
    x_train = x_train[y_train == 1]
    x_val = x_val[y_val == 1]

    # train model
    batch_size = 128
    if epochs is None:
        epochs = int(np.ceil(200 / transformer.n_transforms))
    mdl.fit(x_train, x_val, train_batch_size=batch_size, epochs=epochs)
    met_dict = mdl.evaluate_od(x_train, x_test, y_test, dataset_name, 'real',
                               x_val)

    return met_dict[general_keys.DIRICHLET]['roc_auc'], \
           met_dict[general_keys.MATRIX_TRACE]['roc_auc'], \
           met_dict[general_keys.ENTROPY]['roc_auc'], \
           met_dict[general_keys.CROSS_ENTROPY]['roc_auc']
Ejemplo n.º 2
0
def test_all_tf2(mdl: TransformODModel,
                 transformer,
                 data_loader,
                 dataset_name='hits-4-c',
                 epochs=None):
    """Dirichlet as in object"""
    results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results')
    data_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data')
    utils.check_path(data_dir)
    utils.check_path(os.path.join(results_dir, dataset_name))

    # load data
    (x_train, y_train), (x_val, y_val), (
        x_test, y_test) = data_loader.get_outlier_detection_datasets()

    # train model
    batch_size = 128
    if epochs is None:
        epochs = int(np.ceil(200 / transformer.n_transforms))
    mdl.fit(x_train, x_val, train_batch_size=batch_size, epochs=epochs)
    met_dict = mdl.evaluate_od(x_train, x_test, y_test, dataset_name, 'real',
                               x_val)

    return met_dict[general_keys.DIRICHLET]['roc_auc'], \
           met_dict[general_keys.MATRIX_TRACE]['roc_auc'], \
           met_dict[general_keys.ENTROPY]['roc_auc'], \
           met_dict[general_keys.CROSS_ENTROPY]['roc_auc']
Ejemplo n.º 3
0
def test_tf1_transformed_data_on_tf2_model_original(
        mdl: AlreadyTransformODModel,
        transformer,
        dataset_name='hits-4-c',
        single_class_ind=1,
        tf_version='tf1',
        transformer_name='transtransformed',
        model_name='resnet',
        epochs=None):
    """Dirichlet as in object"""
    results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results')
    data_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data')
    utils.check_path(data_dir)
    utils.check_path(os.path.join(results_dir, dataset_name))

    # load data
    normal_data_path = os.path.join(
        data_dir, 'normal_data_%s_%s_loading.pkl' % (dataset_name, tf_version))
    (x_train, y_train), (x_val,
                         y_val), (x_test,
                                  y_test) = pd.read_pickle(normal_data_path)

    # load transformed data
    transformed_data_path = os.path.join(
        data_dir, '%s_data_%s_%s_loading.pkl' %
        (transformer_name, dataset_name, tf_version))
    (x_train_transform_tf1,
     y_train_transform_tf1), (x_val_transform_tf1, y_val_transform_tf1), (
         x_test_transform_tf1,
         y_test_transform_tf1) = pd.read_pickle(transformed_data_path)
    print(x_train.shape)
    print(x_train_transform_tf1.shape)
    print(x_test.shape)
    print(x_test_transform_tf1.shape)

    # train model
    batch_size = 128
    if epochs is None:
        epochs = int(np.ceil(200 / transformer.n_transforms))
    mdl.fit((x_train_transform_tf1, y_train_transform_tf1),
            (x_val_transform_tf1, y_val_transform_tf1),
            train_batch_size=batch_size,
            epochs=epochs)
    met_dict = mdl.evaluate_od((x_train_transform_tf1, y_train_transform_tf1),
                               (x_test_transform_tf1, y_test_transform_tf1),
                               y_test, dataset_name, 'real',
                               (x_val_transform_tf1, y_val_transform_tf1))
    """
  roc_auc
  dirichlet 0.9896582500000001
  matrix_trace 0.9541035
  entropy 0.9820515000000001
  cross_entropy 0.9614397499999999
  mutual_information 0.9889197499999999
  """
    return met_dict[general_keys.DIRICHLET]['roc_auc'], \
           met_dict[general_keys.MATRIX_TRACE]['roc_auc'], \
           met_dict[general_keys.ENTROPY]['roc_auc'], \
           met_dict[general_keys.CROSS_ENTROPY]['roc_auc']
Ejemplo n.º 4
0
 def _get_template_save_path(self) -> str:
     """get name of final saved file to check if it's been already generated"""
     text_to_add = 'generated_%s/seed%i_crop%s_nChannels%i' % (
         self.name, self.random_seed, str(
             self.crop_size), len(self.used_channels))
     save_path = utils.add_text_to_beginning_of_file_path(
         self.data_path, text_to_add)
     utils.check_path(os.path.dirname(os.path.abspath(save_path)))
     return save_path
Ejemplo n.º 5
0
def run_experiments(load_dataset_fn, dataset_name, q, class_idx, n_runs):
    check_path(os.path.join(RESULTS_DIR, dataset_name))
    # # Kernel-plus-Transformations
    # for _ in range(n_runs):
    #     processes = [Process(target=_kernal_plus_transformations_experiment,
    #                          args=(
    #                          load_dataset_fn, dataset_name, class_idx, q))]
    #     for p in processes:
    #         p.start()
    #     for p in processes:
    #         p.join()
    #
    # # MO_GAAL
    # for _ in range(n_runs):
    #     _mo_gaal_experiment(load_dataset_fn, dataset_name, class_idx)
    #
    # # IF
    # for _ in range(n_runs):
    #     _if_experiment(load_dataset_fn, dataset_name, class_idx)
    #
    # # CAE OC-SVM
    # for _ in range(n_runs):
    #     processes = [Process(target=_cae_ocsvm_experiment,
    #                          args=(load_dataset_fn, dataset_name, class_idx, q))]
    #     for p in processes:
    #         p.start()
    #         p.join()
    #
    # # Raw OC-SVM
    # for _ in range(n_runs):
    #     _raw_ocsvm_experiment(load_dataset_fn, dataset_name, class_idx)
    #
    # Transformations
    for _ in range(n_runs):
        processes = [
            Process(target=_transformations_experiment,
                    args=(load_dataset_fn, dataset_name, class_idx, q))
        ]
        for p in processes:
            p.start()
        for p in processes:
            p.join()
    #
    # Trans-Transformations
    for _ in range(n_runs):
        processes = [
            Process(target=_trans_transformations_experiment,
                    args=(load_dataset_fn, dataset_name, class_idx, q))
        ]
        for p in processes:
            p.start()
        for p in processes:
            p.join()
Ejemplo n.º 6
0
 def check_best_model_save(self, iteration):
     if iteration == 0:
         best_model_weights_folder = os.path.join(PROJECT_PATH,
                                                  constants.RESULTS,
                                                  'aux_weights')
         utils.check_path(best_model_weights_folder)
         self.best_model_weights_path = os.path.join(
             best_model_weights_folder, 'best_weights.ckpt')
         self.save_weights(self.best_model_weights_path)
         return
     self.best_model_so_far[general_keys.NOT_IMPROVED_COUNTER] += 1
     if self.eval_loss.result() < self.best_model_so_far[general_keys.LOSS]:
         self.best_model_so_far[general_keys.LOSS] = self.eval_loss.result()
         self.best_model_so_far[general_keys.NOT_IMPROVED_COUNTER] = 0
         self.best_model_so_far[general_keys.ITERATION] = iteration
         self.save_weights(self.best_model_weights_path)
         if self.verbose:
             print("\nNew best validation model: %s %.4f @ it %d\n" %
                   (general_keys.LOSS,
                    self.best_model_so_far[general_keys.LOSS],
                    self.best_model_so_far[general_keys.ITERATION]),
                   flush=True)
Ejemplo n.º 7
0
def test_tf1_transformed_data_on_tf2_keras_model_diri(
        mdl: tf.keras.Model,
        transformer,
        dataset_name='hits-4-c',
        single_class_ind=1,
        tf_version='tf1',
        transformer_name='transtransformed',
        model_name='resnet',
        epochs=None):
    results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results')
    data_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data')
    utils.check_path(data_dir)
    utils.check_path(os.path.join(results_dir, dataset_name))

    # load data
    normal_data_path = os.path.join(
        data_dir, 'normal_data_%s_%s_loading.pkl' % (dataset_name, tf_version))
    (x_train, y_train), (x_val,
                         y_val), (x_test,
                                  y_test) = pd.read_pickle(normal_data_path)

    # load transformed data
    transformed_data_path = os.path.join(
        data_dir, '%s_data_%s_%s_loading.pkl' %
        (transformer_name, dataset_name, tf_version))
    (x_train_transform_tf1,
     y_train_transform_tf1), (x_val_transform_tf1, y_val_transform_tf1), (
         x_test_transform_tf1,
         y_test_transform_tf1) = pd.read_pickle(transformed_data_path)
    print(x_train.shape)
    print(x_train_transform_tf1.shape)
    print(x_test.shape)
    print(x_test_transform_tf1.shape)

    # train model
    batch_size = 128
    if epochs is None:
        epochs = int(np.ceil(200 / transformer.n_transforms))
    mdl.fit(x_train_transform_tf1,
            to_categorical(y_train_transform_tf1),
            validation_data=(x_val_transform_tf1,
                             to_categorical(y_val_transform_tf1)),
            batch_size=batch_size,
            epochs=epochs)
    scores = np.zeros((len(x_test), ))
    matrix_evals = np.zeros(
        (len(x_test), transformer.n_transforms, transformer.n_transforms))
    x_pred_train = mdl.predict(x_train_transform_tf1, batch_size=1024)
    x_pred_test = mdl.predict(x_test_transform_tf1, batch_size=1024)
    print(x_pred_train.shape)
    print(x_pred_test.shape)
    for t_ind in range(transformer.n_transforms):
        ind_x_pred_equal_to_t_ind = np.where(y_train_transform_tf1 == t_ind)[0]
        observed_dirichlet = x_pred_train[ind_x_pred_equal_to_t_ind]
        log_p_hat_train = np.log(observed_dirichlet).mean(axis=0)

        alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet)
        alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)
        ind_x_pred_test_equal_to_t_ind = np.where(
            y_test_transform_tf1 == t_ind)[0]
        x_test_p = x_pred_test[ind_x_pred_test_equal_to_t_ind]
        matrix_evals[:, :, t_ind] += x_test_p
        scores += dirichlet_normality_score(mle_alpha_t, x_test_p)

    scores /= transformer.n_transforms
    matrix_evals /= transformer.n_transforms
    scores_simple = np.trace(matrix_evals, axis1=1, axis2=2)
    scores_entropy = -1 * get_entropy(matrix_evals)
    scores_xH = -1 * get_xH(transformer, matrix_evals)
    labels = y_test.flatten() == single_class_ind
    """
  Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c', tf_version='tf1') 00:04:31.65
  (0.992217, 0.9895665, 0.99131725, 0.989478125)
  (0.99240075, 0.9900822499999999, 0.99215325, 0.9901300000000001)
  """
    return get_roc_auc(scores, labels), get_roc_auc(scores_simple, labels), \
           get_roc_auc(scores_entropy, labels), get_roc_auc(scores_xH, labels)
Ejemplo n.º 8
0
def test_model_loading(transformer,
                       mdl,
                       loader,
                       dataset_name='hits-4-c',
                       single_class_ind=1,
                       tf_version='tf1',
                       transformer_name='transformed',
                       model_name='resnet',
                       epochs=None):
    results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results')
    save_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data')
    utils.check_path(results_dir)
    utils.check_path(save_dir)
    utils.check_path(os.path.join(results_dir, dataset_name))

    # load-save data
    normal_data_path = os.path.join(
        save_dir, 'normal_data_%s_%s_loading.pkl' % (dataset_name, tf_version))
    if os.path.exists(normal_data_path):
        (x_train,
         y_train), (x_val, y_val), (x_test,
                                    y_test) = pd.read_pickle(normal_data_path)
    else:
        (x_train, y_train), (x_val, y_val), (x_test,
                                             y_test) = loader(return_val=True)
        normal_data = (x_train, y_train), (x_val, y_val), (x_test, y_test)
        utils.save_pickle(normal_data, normal_data_path)
    # create model
    # n, k = (10, 4)
    # mdl = create_wide_residual_network(
    #     x_train.shape[1:], transformer.n_transforms, n, k)
    mdl.compile('adam', 'categorical_crossentropy', ['acc'])
    # selec inliers
    x_train = x_train[y_train.flatten() == single_class_ind]
    x_val = x_val[y_val.flatten() == single_class_ind]

    # load-save transformed data
    transformed_data_path = os.path.join(
        save_dir, '%s_data_%s_%s_loading.pkl' %
        (transformer_name, dataset_name, tf_version))
    if os.path.exists(transformed_data_path):
        (x_train_transform_tf1,
         y_train_transform_tf1), (x_val_transform_tf1, y_val_transform_tf1), (
             x_test_transform_tf1,
             y_test_transform_tf1) = pd.read_pickle(transformed_data_path)
    else:
        # transform all data
        y_train_transform_tf1 = np.tile(np.arange(transformer.n_transforms),
                                        len(x_train))
        x_train_transform_tf1 = transformer.transform_batch(
            np.repeat(x_train, transformer.n_transforms, axis=0),
            y_train_transform_tf1)
        y_val_transform_tf1 = np.tile(np.arange(transformer.n_transforms),
                                      len(x_val))
        x_val_transform_tf1 = transformer.transform_batch(
            np.repeat(x_val, transformer.n_transforms, axis=0),
            y_val_transform_tf1)
        y_test_transform_tf1 = np.tile(np.arange(transformer.n_transforms),
                                       len(x_test))
        x_test_transform_tf1 = transformer.transform_batch(
            np.repeat(x_test, transformer.n_transforms, axis=0),
            y_test_transform_tf1)
        transformed_data = ((x_train_transform_tf1, y_train_transform_tf1),
                            (x_val_transform_tf1, y_val_transform_tf1),
                            (x_test_transform_tf1, y_test_transform_tf1))
        utils.save_pickle(transformed_data, transformed_data_path)
    print(x_train.shape)
    print(x_train_transform_tf1.shape)
    print(x_test.shape)
    print(x_test_transform_tf1.shape)
    # train model
    batch_size = 128
    if epochs is None:
        epochs = int(np.ceil(200 / transformer.n_transforms))
    mdl.fit(x=x_train_transform_tf1,
            y=to_categorical(y_train_transform_tf1),
            batch_size=batch_size,
            epochs=epochs)
    scores = np.zeros((len(x_test), ))
    matrix_evals = np.zeros(
        (len(x_test), transformer.n_transforms, transformer.n_transforms))
    x_pred_train = mdl.predict(x_train_transform_tf1, batch_size=1024)
    x_pred_test = mdl.predict(x_test_transform_tf1, batch_size=1024)
    print(x_pred_train.shape)
    print(x_pred_test.shape)
    for t_ind in range(transformer.n_transforms):
        ind_x_pred_equal_to_t_ind = np.where(y_train_transform_tf1 == t_ind)[0]
        observed_dirichlet = x_pred_train[ind_x_pred_equal_to_t_ind]
        log_p_hat_train = np.log(observed_dirichlet).mean(axis=0)

        alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet)
        alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)
        ind_x_pred_test_equal_to_t_ind = np.where(
            y_test_transform_tf1 == t_ind)[0]
        x_test_p = x_pred_test[ind_x_pred_test_equal_to_t_ind]
        matrix_evals[:, :, t_ind] += x_test_p
        scores += dirichlet_normality_score(mle_alpha_t, x_test_p)

    scores /= transformer.n_transforms
    matrix_evals /= transformer.n_transforms
    scores_simple = np.trace(matrix_evals, axis1=1, axis2=2)
    scores_entropy = -1 * get_entropy(matrix_evals)
    scores_xH = -1 * get_xH(transformer, matrix_evals)
    labels = y_test.flatten() == single_class_ind

    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores,
                      labels=labels,
                      experiment_name='%s-%s-loading-%s' %
                      (model_name, transformer_name, tf_version))
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_simple,
                      labels=labels,
                      experiment_name='%s-%s-simple-loading-%s' %
                      (model_name, transformer_name, tf_version))
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_entropy,
                      labels=labels,
                      experiment_name='%s-%s-entropy-loading-%s' %
                      (model_name, transformer_name, tf_version))
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_xH,
                      labels=labels,
                      experiment_name='%s-%s-xH-loading-%s' %
                      (model_name, transformer_name, tf_version))
    mdl_weights_name = '{}_{}_{}_{}_loading_{}_weights.h5'.format(
        model_name, transformer_name, dataset_name, tf_version,
        get_class_name_from_index(single_class_ind, dataset_name))
    mdl_weights_path = os.path.join(results_dir, dataset_name,
                                    mdl_weights_name)
    mdl.save_weights(mdl_weights_path)
    reset_weights()
    """
  Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c', tf_version='tf1') 00:04:31.65
  (0.992217, 0.9895665, 0.99131725, 0.989478125)
  (0.99240075, 0.9900822499999999, 0.99215325, 0.9901300000000001)
  """
    return get_roc_auc(scores, labels), get_roc_auc(scores_simple, labels), \
           get_roc_auc(scores_entropy, labels), get_roc_auc(scores_xH, labels)
Ejemplo n.º 9
0
def test_model_original(transformer,
                        loader,
                        dataset_name='hits-4-c',
                        single_class_ind=1):
    results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results')
    save_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data')
    utils.check_path(results_dir)
    utils.check_path(save_dir)
    utils.check_path(os.path.join(results_dir, dataset_name))

    # load-save data
    (x_train, y_train), (x_val, y_val), (x_test,
                                         y_test) = loader(return_val=True)
    normal_data = (x_train, y_train), (x_val, y_val), (x_test, y_test)
    utils.save_pickle(
        normal_data,
        os.path.join(save_dir,
                     'normal_data_%s_tf1_original.pkl' % dataset_name))
    # create model
    n, k = (10, 4)
    mdl = create_wide_residual_network(x_train.shape[1:],
                                       transformer.n_transforms, n, k)
    mdl.compile('adam', 'categorical_crossentropy', ['acc'])
    # get inliers of specific class
    # get inliers
    x_train_task = x_train[y_train.flatten() == single_class_ind]
    print(x_train_task.shape)
    # transform inliers
    transformations_inds = np.tile(np.arange(transformer.n_transforms),
                                   len(x_train_task))
    x_train_task_transformed = transformer.transform_batch(
        np.repeat(x_train_task, transformer.n_transforms, axis=0),
        transformations_inds)
    print(x_train_task_transformed.shape)
    # train model
    batch_size = 128
    mdl.fit(x=x_train_task_transformed,
            y=to_categorical(transformations_inds),
            batch_size=batch_size,
            epochs=int(np.ceil(200 / transformer.n_transforms)))
    scores = np.zeros((len(x_test), ))
    matrix_evals = np.zeros(
        (len(x_test), transformer.n_transforms, transformer.n_transforms))
    observed_data = x_train_task
    for t_ind in range(transformer.n_transforms):
        observed_dirichlet = mdl.predict(transformer.transform_batch(
            observed_data, [t_ind] * len(observed_data)),
                                         batch_size=1024)
        log_p_hat_train = np.log(observed_dirichlet).mean(axis=0)

        alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet)
        alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)

        x_test_p = mdl.predict(transformer.transform_batch(
            x_test, [t_ind] * len(x_test)),
                               batch_size=1024)
        matrix_evals[:, :, t_ind] += x_test_p
        scores += dirichlet_normality_score(mle_alpha_t, x_test_p)

    scores /= transformer.n_transforms
    matrix_evals /= transformer.n_transforms
    scores_simple = np.trace(matrix_evals, axis1=1, axis2=2)
    scores_entropy = -1 * get_entropy(matrix_evals)
    scores_xH = -1 * get_xH(transformer, matrix_evals)
    labels = y_test.flatten() == single_class_ind

    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores,
                      labels=labels,
                      experiment_name='transformations')
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_simple,
                      labels=labels,
                      experiment_name='transformations-simple')
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_entropy,
                      labels=labels,
                      experiment_name='transformations-entropy')
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_xH,
                      labels=labels,
                      experiment_name='transformations-xH')
    mdl_weights_name = '{}_tf1_original_{}_weights.h5'.format(
        dataset_name, get_class_name_from_index(single_class_ind,
                                                dataset_name))
    mdl_weights_path = os.path.join(results_dir, dataset_name,
                                    mdl_weights_name)
    mdl.save_weights(mdl_weights_path)
    """
  Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c') 00:06:58.37
  (0.9917134999999999, 0.9350055, 0.9872614999999999, 0.94142025)
  (0.9938067500000001, 0.9923547500000001, 0.9931685, 0.992637375)
  (0.9912172499999999, 0.9883357499999998, 0.9909070000000001, 0.9886706249999999)
  #train only Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c', tf_version='tf1') 00:03:48.29
  """
    return get_roc_auc(scores, labels), get_roc_auc(scores_simple, labels), \
           get_roc_auc(scores_entropy, labels), get_roc_auc(scores_xH, labels)