Beispiel #1
0
config_file = 'architecture_config.yaml'
with open(config_file, 'r') as fh:
    config = yaml.safe_load(fh)
config = config[DATA_SET]
anomaly_ratio = config['anomaly_ratio']
num_anomaly_sets = 5

# =======================================================================================
# On the same data, run the model multiple times , varying the number of negative samples
# =======================================================================================

for nr in range(num_runs):
    pos, neg, data_dict = model_data_fetcher.fetch_model_data(
        DATA_SET,
        num_anom_sets=num_anomaly_sets,
        anomaly_ratio=anomaly_ratio,
        num_neg_samples=neg_samples_max)
    runs_aupr = []
    for neg_samples in range(neg_samples_min,
                             neg_samples_max + neg_samples_step,
                             neg_samples_step):
        _neg = neg[:, :neg_samples, :]
        _mean_aupr, std = execute_run(DATA_SET, pos, _neg, data_dict, config)
        runs_aupr.append(_mean_aupr)

    _mean = np.mean(runs_aupr)
    LOGGER.info('Run {} : AUPR Mean : {:4f} '.format(nr + 1, _mean))
    LOGGER.info(format_arr(runs_aupr))
    results.append(runs_aupr)
# ================================
Beispiel #2
0
LOG_FILE = 'log_results_{}.txt'.format(DATA_SET)
LOGGER = utils.get_logger(LOG_FILE)
utils.log_time(LOGGER)
LOGGER.info(DATA_SET)
results = []

for n in range(1, num_runs + 1):

    config_file = 'architecture_config.yaml'
    with open(config_file, 'r') as fh:
        config = yaml.safe_load(fh)
    config = config[DATA_SET]
    num_anomaly_sets = 5
    anomaly_ratio = config['anomaly_ratio']

    pos, neg, data_dict = model_data_fetcher.fetch_model_data(
        DATA_SET, num_anom_sets=num_anomaly_sets, anomaly_ratio=anomaly_ratio)

    if num_neg_samples is not None:
        neg = neg[:, :num_neg_samples, :]

    mean_aupr1, std = execute_run(DATA_SET,
                                  pos,
                                  neg,
                                  data_dict,
                                  config,
                                  max_gamma=max_gamma,
                                  include_noise=True)

    mean_aupr2, std = execute_run(DATA_SET,
                                  pos,
                                  neg,
Beispiel #3
0
def execute_run(DATA_SET):

    global LOGGER
    encoder_structure_config, decoder_structure_config, loss_structure_config, latent_dim = utils.create_config(
        DATA_SET)
    anomaly_ratio = -1
    ae_model = None

    config_file = 'architecture_config.yaml'

    with open(config_file, 'r') as fh:
        config = yaml.safe_load(fh)
    config = config[DATA_SET]

    burn_in_epochs = config['burn_in_epochs']
    phase_2_epochs = config['phase_2_epochs']
    phase_3_epochs = config['phase_3_epochs']
    batch_size = config['batch_size']
    ae_dropout = config['ae_dropout']
    fc_dropout = config['fc_dropout']
    anomaly_ratio = config['anomaly_ratio']
    LR = config['LR']
    max_gamma = config['max_gamma']

    num_anomaly_sets = 5
    pos, neg, data_dict = model_data_fetcher.fetch_model_data(
        DATA_SET, num_anom_sets=num_anomaly_sets, anomaly_ratio=anomaly_ratio)
    data_dim = pos.shape[1]
    not_converged = True
    while not_converged:
        ae_model = Model(DATA_SET,
                         DEVICE,
                         latent_dim,
                         data_dim,
                         encoder_structure_config,
                         decoder_structure_config,
                         loss_structure_config,
                         batch_size=batch_size,
                         fc_dropout=fc_dropout,
                         ae_dropout=ae_dropout,
                         learning_rate=LR,
                         max_gamma=max_gamma,
                         burn_in_epochs=burn_in_epochs,
                         phase_2_epochs=phase_2_epochs,
                         phase_3_epochs=phase_3_epochs)

        print(ae_model.network_module)

        _, epoch_losses_phase_3 = ae_model.train_model(pos, neg)
        print(epoch_losses_phase_3)
        if epoch_losses_phase_3[-1] < epoch_losses_phase_3[0]:
            not_converged = False

    test_norm_X = data_dict['test']
    auc_list = []
    ae_model.mode = 'test'

    def _normalize_(val, _min, _max):
        return (val - _min) / (_max - _min)

    for idx in range(1, num_anomaly_sets + 1):
        key = 'anom_' + str(idx)
        test_anom_df = data_dict[key]
        test_anom_X = test_anom_df.values
        x1 = test_norm_X
        x2 = test_anom_X

        x1_scores = ae_model.get_score(x1)
        x2_scores = ae_model.get_score(x2)

        res_data = []
        labels = [1 for _ in range(x1.shape[0])
                  ] + [0 for _ in range(x2.shape[0])]
        _scores = np.concatenate([x1_scores, x2_scores], axis=0)

        for i, j in zip(_scores, labels):
            res_data.append((i[0], j))

        res_df = pd.DataFrame(res_data, columns=['score', 'label'])
        res_df = res_df.sort_values(by=['score'], ascending=True)

        _max = max(res_df['score'])
        _min = min(res_df['score'])

        res_df['score'] = res_df['score'].parallel_apply(_normalize_,
                                                         args=(
                                                             _min,
                                                             _max,
                                                         ))
        _max = max(res_df['score'])
        _min = min(res_df['score'])

        step = (_max - _min) / 100

        # Vary the threshold
        thresh = _min + step
        thresh = round(thresh, 3)
        num_anomalies = x2.shape[0]
        print('Num anomalies', num_anomalies)
        P = []
        R = [0]

        while thresh <= _max + step:

            sel = res_df.loc[res_df['score'] <= thresh]
            if len(sel) == 0:
                thresh += step
                continue

            correct = sel.loc[sel['label'] == 0]
            prec = len(correct) / len(sel)
            rec = len(correct) / num_anomalies
            P.append(prec)
            R.append(rec)
            thresh += step
            thresh = round(thresh, 3)

        P = [P[0]] + P
        pr_auc = auc(R, P)
        try:
            plt.figure(figsize=[8, 6])
            plt.plot(R, P)
            plt.title('Precision Recall Curve  || auPR :' +
                      "{:0.4f}".format(pr_auc),
                      fontsize=15)
            plt.xlabel('Recall', fontsize=15)
            plt.ylabel('Precision', fontsize=15)
            plt.show()
        except:
            pass
        print("AUC : {:0.4f} ".format(pr_auc))
        auc_list.append(pr_auc)

    _mean = np.mean(auc_list)
    _std = np.std(auc_list)
    print(' Mean AUC {:0.4f} '.format(_mean))
    print(' AUC std {:0.4f} '.format(_std))
    return _mean, _std