Ejemplo n.º 1
0
def RunModel(train_filename, test_filename, label_filename, config, ratio):
    negative_sample = True if "noise" in config.dataset else False
    train_data, abnormal_data, abnormal_label = read_dataset(
        train_filename,
        test_filename,
        label_filename,
        normalize=True,
        file_logger=file_logger,
        negative_sample=negative_sample,
        ratio=ratio)
    if abnormal_data.shape[0] < config.rolling_size:
        train_logger.warning(
            "test data is less than rolling_size! Ignore the current data!")
        TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {}
        for threshold_method in ["SD", "MAD", "IQR"]:
            TN[threshold_method] = -1
            FP[threshold_method] = -1
            FN[threshold_method] = -1
            TP[threshold_method] = -1
            precision[threshold_method] = -1
            recall[threshold_method] = -1
            f1[threshold_method] = -1
        roc_auc = -1
        pr_auc = -1
        metrics_result = MetricsResult(TN=TN,
                                       FP=FP,
                                       FN=FN,
                                       TP=TP,
                                       precision=precision,
                                       recall=recall,
                                       fbeta=f1,
                                       pr_auc=pr_auc,
                                       roc_auc=roc_auc)
        return metrics_result

    original_x_dim = abnormal_data.shape[1]

    rolling_train_data = None
    rolling_valid_data = None
    if config.preprocessing:
        if config.use_overlapping:
            if train_data is not None:
                rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = rolling_window_2D(
                    train_data, config.rolling_size), rolling_window_2D(
                        abnormal_data, config.rolling_size), rolling_window_2D(
                            abnormal_label, config.rolling_size)
                train_split_idx = int(rolling_train_data.shape[0] * 0.7)
                rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[
                    train_split_idx:]
            else:
                rolling_abnormal_data, rolling_abnormal_label = rolling_window_2D(
                    abnormal_data, config.rolling_size), rolling_window_2D(
                        abnormal_label, config.rolling_size)
        else:
            if train_data is not None:
                rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = cutting_window_2D(
                    train_data, config.rolling_size), cutting_window_2D(
                        abnormal_data, config.rolling_size), cutting_window_2D(
                            abnormal_label, config.rolling_size)
                train_split_idx = int(rolling_train_data.shape[0] * 0.7)
                rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[
                    train_split_idx:]
            else:
                rolling_abnormal_data, rolling_abnormal_label = cutting_window_2D(
                    abnormal_data, config.rolling_size), cutting_window_2D(
                        abnormal_label, config.rolling_size)
    else:
        if train_data is not None:
            rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = np.expand_dims(
                train_data,
                axis=0), np.expand_dims(abnormal_data,
                                        axis=0), np.expand_dims(abnormal_label,
                                                                axis=0)
            train_split_idx = int(rolling_train_data.shape[0] * 0.7)
            rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[
                train_split_idx:]
        else:
            rolling_abnormal_data, rolling_abnormal_label = np.expand_dims(
                abnormal_data, axis=0), np.expand_dims(abnormal_label, axis=0)

    config.x_dim = rolling_abnormal_data.shape[2]

    model = DONUT(file_name=train_filename, config=config)
    model = model.to(device)
    donut_output = None
    if train_data is not None and config.robustness == False:
        donut_output = model.fit(train_input=rolling_train_data,
                                 train_label=rolling_train_data,
                                 valid_input=rolling_valid_data,
                                 valid_label=rolling_valid_data,
                                 test_input=rolling_abnormal_data,
                                 test_label=rolling_abnormal_label,
                                 abnormal_data=abnormal_data,
                                 abnormal_label=abnormal_label,
                                 original_x_dim=original_x_dim)
    elif train_data is None or config.robustness == True:
        donut_output = model.fit(train_input=rolling_abnormal_data,
                                 train_label=rolling_abnormal_data,
                                 valid_input=rolling_valid_data,
                                 valid_label=rolling_valid_data,
                                 test_input=rolling_abnormal_data,
                                 test_label=rolling_abnormal_label,
                                 abnormal_data=abnormal_data,
                                 abnormal_label=abnormal_label,
                                 original_x_dim=original_x_dim)
    # %%
    min_max_scaler = preprocessing.MinMaxScaler()
    if config.preprocessing:
        if config.use_overlapping:
            if config.use_last_point:
                dec_mean_unroll = np.reshape(
                    donut_output.dec_means.detach().cpu().numpy(),
                    (-1, config.rolling_size, original_x_dim))[:, -1]
                latent_mean_unroll = donut_output.zs.detach().cpu().numpy()
                dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll)
                x_original_unroll = abnormal_data[config.rolling_size - 1:]
            else:
                dec_mean_unroll = unroll_window_3D(
                    np.reshape(
                        donut_output.dec_means.detach().cpu().numpy(),
                        (-1, config.rolling_size, original_x_dim)))[::-1]
                latent_mean_unroll = donut_output.zs.detach().cpu().numpy()
                dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll)
                x_original_unroll = abnormal_data[:dec_mean_unroll.shape[0]]

        else:
            dec_mean_unroll = np.reshape(
                donut_output.dec_means.detach().cpu().numpy(),
                (-1, original_x_dim))
            latent_mean_unroll = donut_output.zs.detach().cpu().numpy()
            dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll)
            x_original_unroll = abnormal_data[:dec_mean_unroll.shape[0]]
    else:
        dec_mean_unroll = donut_output.dec_means.detach().cpu().numpy()
        latent_mean_unroll = donut_output.zs.detach().cpu().numpy()
        dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll)
        x_original_unroll = abnormal_data

    if config.save_output:
        if not os.path.exists('./outputs/NPY/{}/'.format(config.dataset)):
            os.makedirs('./outputs/NPY/{}/'.format(config.dataset))
        np.save(
            './outputs/NPY/{}/Dec_DONUT_hdim_{}_rollingsize_{}_{}_pid={}.npy'.
            format(config.dataset, config.h_dim, config.rolling_size,
                   train_filename.stem, config.pid), dec_mean_unroll)
        np.save(
            './outputs/NPY/{}/Latent_DONUT_hdim_{}_rollingsize_{}_{}_pid={}.npy'
            .format(config.dataset, config.h_dim, config.rolling_size,
                    train_filename.stem, config.pid), latent_mean_unroll)

    error = np.sum(x_original_unroll -
                   np.reshape(dec_mean_unroll, [-1, original_x_dim]),
                   axis=1)**2
    # final_zscore = zscore(error)
    # np_decision = create_label_based_on_zscore(final_zscore, 2.5, True)
    #np_decision = create_label_based_on_quantile(error, quantile=99)
    SD_Tmin, SD_Tmax = SD_autothreshold(error)
    SD_y_hat = get_labels_by_threshold(error,
                                       Tmax=SD_Tmax,
                                       use_max=True,
                                       use_min=False)
    MAD_Tmin, MAD_Tmax = MAD_autothreshold(error)
    MAD_y_hat = get_labels_by_threshold(error,
                                        Tmax=MAD_Tmax,
                                        use_max=True,
                                        use_min=False)
    IQR_Tmin, IQR_Tmax = IQR_autothreshold(error)
    IQR_y_hat = get_labels_by_threshold(error,
                                        Tmax=IQR_Tmax,
                                        use_max=True,
                                        use_min=False)
    np_decision = {}
    np_decision["SD"] = SD_y_hat
    np_decision["MAD"] = MAD_y_hat
    np_decision["IQR"] = IQR_y_hat

    # TODO metrics computation.

    # %%
    if config.save_figure:
        if original_x_dim == 1:
            plt.figure(figsize=(9, 3))
            plt.plot(x_original_unroll, color='blue', lw=1.5)
            plt.title('Original Data')
            plt.grid(True)
            plt.tight_layout()
            # plt.show()
            plt.savefig(
                './figures/{}/Ori_DONUT_hdim_{}_rollingsize_{}_{}_pid={}.png'.
                format(config.dataset, config.h_dim, config.rolling_size,
                       train_filename.stem, config.pid),
                dpi=600)
            plt.close()

            # Plot decoder output
            plt.figure(figsize=(9, 3))
            plt.plot(dec_mean_unroll, color='blue', lw=1.5)
            plt.title('Decoding Output')
            plt.grid(True)
            plt.tight_layout()
            # plt.show()
            plt.savefig(
                './figures/{}/Dec_DONUT_hdim_{}_rollingsize_{}_{}_pid={}.png'.
                format(config.dataset, config.h_dim, config.rolling_size,
                       train_filename.stem, config.pid),
                dpi=600)
            plt.close()

            t = np.arange(0, abnormal_data.shape[0])
            # markercolors = ['blue' if i == 1 else 'red' for i in abnormal_label[: dec_mean_unroll.shape[0]]]
            # markersize = [4 if i == 1 else 25 for i in abnormal_label[: dec_mean_unroll.shape[0]]]
            # plt.figure(figsize=(9, 3))
            # ax = plt.axes()
            # plt.yticks([0, 0.25, 0.5, 0.75, 1])
            # ax.set_xlim(t[0] - 10, t[-1] + 10)
            # ax.set_ylim(-0.10, 1.10)
            # plt.xlabel('$t$')
            # plt.ylabel('$s$')
            # plt.grid(True)
            # plt.tight_layout()
            # plt.margins(0.1)
            # plt.plot(abnormal_data[: dec_mean_unroll.shape[0]], alpha=0.7)
            # plt.scatter(t[: dec_mean_unroll.shape[0]], x_original_unroll[: np_decision.shape[0]], s=markersize, c=markercolors)
            # # plt.show()
            # plt.savefig('./figures/{}/VisInp_DONUT_{}_pid={}.png'.format(config.dataset, Path(file_name).stem, config.pid), dpi=600)
            # plt.close()

            markercolors = ['blue' for i in range(config.rolling_size - 1)] + [
                'blue' if i == 1 else 'red' for i in np_decision["SD"]
            ]
            markersize = [4 for i in range(config.rolling_size - 1)
                          ] + [4 if i == 1 else 25 for i in np_decision["SD"]]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(abnormal_data, alpha=0.7)
            plt.scatter(t, abnormal_data, s=markersize, c=markercolors)
            # plt.show()
            plt.savefig(
                './figures/{}/VisOut_DONUT_hdim_{}_rollingsize_{}_SD_{}_pid={}.png'
                .format(config.dataset, config.h_dim, config.rolling_size,
                        train_filename.stem, config.pid),
                dpi=300)
            plt.close()

            markercolors = ['blue' for i in range(config.rolling_size - 1)] + [
                'blue' if i == 1 else 'red' for i in np_decision["MAD"]
            ]
            markersize = [4 for i in range(config.rolling_size - 1)] + [
                4 if i == 1 else 25 for i in np_decision["MAD"]
            ]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(abnormal_data, alpha=0.7)
            plt.scatter(t, abnormal_data, s=markersize, c=markercolors)
            # plt.show()
            plt.savefig(
                './figures/{}/VisOut_DONUT_hdim_{}_rollingsize_{}_MAD_{}_pid={}.png'
                .format(config.dataset, config.h_dim, config.rolling_size,
                        train_filename.stem, config.pid),
                dpi=300)
            plt.close()

            markercolors = ['blue' for i in range(config.rolling_size - 1)] + [
                'blue' if i == 1 else 'red' for i in np_decision["IQR"]
            ]
            markersize = [4 for i in range(config.rolling_size - 1)] + [
                4 if i == 1 else 25 for i in np_decision["IQR"]
            ]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(abnormal_data, alpha=0.7)
            plt.scatter(t, abnormal_data, s=markersize, c=markercolors)
            # plt.show()
            plt.savefig(
                './figures/{}/VisOut_DONUT_hdim_{}_rollingsize_{}_IQR_{}_pid={}.png'
                .format(config.dataset, config.h_dim, config.rolling_size,
                        train_filename.stem, config.pid),
                dpi=300)
            plt.close()
        else:
            file_logger.info('cannot plot image with x_dim > 1')

    if config.use_spot:
        pass
    else:
        pos_label = -1
        TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {}
        for threshold_method in np_decision:
            cm = confusion_matrix(y_true=abnormal_label,
                                  y_pred=np_decision[threshold_method],
                                  labels=[1, -1])
            TN[threshold_method] = cm[0][0]
            FP[threshold_method] = cm[0][1]
            FN[threshold_method] = cm[1][0]
            TP[threshold_method] = cm[1][1]
            precision[threshold_method] = precision_score(
                y_true=abnormal_label,
                y_pred=np_decision[threshold_method],
                pos_label=pos_label)
            recall[threshold_method] = recall_score(
                y_true=abnormal_label,
                y_pred=np_decision[threshold_method],
                pos_label=pos_label)
            f1[threshold_method] = f1_score(
                y_true=abnormal_label,
                y_pred=np_decision[threshold_method],
                pos_label=pos_label)

        fpr, tpr, _ = roc_curve(y_true=abnormal_label,
                                y_score=np.nan_to_num(error),
                                pos_label=pos_label)
        roc_auc = auc(fpr, tpr)
        pre, re, _ = precision_recall_curve(y_true=abnormal_label,
                                            probas_pred=np.nan_to_num(error),
                                            pos_label=pos_label)
        pr_auc = auc(re, pre)
        metrics_result = MetricsResult(
            TN=TN,
            FP=FP,
            FN=FN,
            TP=TP,
            precision=precision,
            recall=recall,
            fbeta=f1,
            pr_auc=pr_auc,
            roc_auc=roc_auc,
            best_TN=donut_output.best_TN,
            best_FP=donut_output.best_FP,
            best_FN=donut_output.best_FN,
            best_TP=donut_output.best_TP,
            best_precision=donut_output.best_precision,
            best_recall=donut_output.best_recall,
            best_fbeta=donut_output.best_fbeta,
            best_pr_auc=donut_output.best_pr_auc,
            best_roc_auc=donut_output.best_roc_auc,
            best_cks=donut_output.best_cks)
        return metrics_result
Ejemplo n.º 2
0
def RunModel(train_filename, test_filename, label_filename, config, ratio):
    negative_sample = True if "noise" in config.dataset else False
    train_data, abnormal_data, abnormal_label = read_dataset(
        train_filename,
        test_filename,
        label_filename,
        normalize=True,
        file_logger=file_logger,
        negative_sample=negative_sample,
        ratio=ratio)

    original_x_dim = abnormal_data.shape[1]

    config.x_dim = abnormal_data.shape[1]

    model = LOF(train_filename, config)
    lof_output = model.fit(train_input=abnormal_data,
                           train_label=abnormal_label,
                           test_input=abnormal_data,
                           test_label=abnormal_label)

    SD_Tmin, SD_Tmax = SD_autothreshold(-lof_output.negative_factor)
    SD_y_hat = get_labels_by_threshold(-lof_output.negative_factor,
                                       Tmax=SD_Tmax,
                                       use_max=True,
                                       use_min=False)
    MAD_Tmin, MAD_Tmax = MAD_autothreshold(-lof_output.negative_factor)
    MAD_y_hat = get_labels_by_threshold(-lof_output.negative_factor,
                                        Tmax=MAD_Tmax,
                                        use_max=True,
                                        use_min=False)
    IQR_Tmin, IQR_Tmax = IQR_autothreshold(-lof_output.negative_factor)
    IQR_y_hat = get_labels_by_threshold(-lof_output.negative_factor,
                                        Tmax=IQR_Tmax,
                                        use_max=True,
                                        use_min=False)
    lof_output.y_hat = {}
    lof_output.y_hat["SD"] = SD_y_hat
    lof_output.y_hat["MAD"] = MAD_y_hat
    lof_output.y_hat["IQR"] = IQR_y_hat

    if config.save_output == True:
        if not os.path.exists('./outputs/NPY/{}/'.format(config.dataset)):
            os.makedirs('./outputs/NPY/{}/'.format(config.dataset))
        np.save(
            './outputs/NPY/{}/Score_LOF_hdim_{}_rollingsize_{}_{}_pid={}.npy'.
            format(config.dataset, config.n_neighbors, 1, train_filename.stem,
                   config.pid), lof_output.negative_factor)
        np.save(
            './outputs/NPY/{}/Pred_LOF_hdim_{}_rollingsize_{}_{}_pid={}.npy'.
            format(config.dataset, config.n_neighbors, 1, train_filename.stem,
                   config.pid), lof_output.y_hat)

    # %%
    if config.save_figure:
        if not os.path.exists('./figures/{}/'.format(config.dataset)):
            os.makedirs('./figures/{}/'.format(config.dataset))
        if original_x_dim == 1:
            plt.figure(figsize=(9, 3))
            plt.plot(abnormal_data, color='blue', lw=1.5)
            plt.title('Original Data')
            plt.grid(True)
            plt.tight_layout()
            # plt.show()
            plt.savefig(
                './figures/{}/Ori_LOF_hdim_{}_rollingsize_{}_{}_pid={}.png'.
                format(config.dataset, config.n_neighbors, 1,
                       train_filename.stem, config.pid),
                dpi=300)
            plt.close()

            t = np.arange(0, abnormal_data.shape[0])
            markercolors = [
                'blue' if i == 1 else 'red' for i in abnormal_label
            ]
            markersize = [4 if i == 1 else 25 for i in abnormal_label]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(abnormal_data, alpha=0.7)
            plt.scatter(t, abnormal_data, s=markersize, c=markercolors)
            # plt.show()
            plt.savefig(
                './figures/{}/VisInp_LOF_hdim_{}_rollingsize_{}_{}_pid={}.png'.
                format(config.dataset, config.n_neighbors, 1,
                       train_filename.stem, config.pid),
                dpi=300)
            plt.close()

            markercolors = [
                'blue' if i == 1 else 'red' for i in lof_output.y_hat["SD"]
            ]
            markersize = [4 if i == 1 else 25 for i in lof_output.y_hat["SD"]]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(abnormal_data, alpha=0.7)
            plt.scatter(t, abnormal_data, s=markersize, c=markercolors)
            # plt.show()
            plt.savefig(
                './figures/{}/VisOut_LOF_hdim_{}_rollingsize_{}_SD_{}_pid={}.png'
                .format(config.dataset, config.n_neighbors, 1,
                        train_filename.stem, config.pid),
                dpi=300)
            plt.close()

            markercolors = [
                'blue' if i == 1 else 'red' for i in lof_output.y_hat["MAD"]
            ]
            markersize = [4 if i == 1 else 25 for i in lof_output.y_hat["MAD"]]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(abnormal_data, alpha=0.7)
            plt.scatter(t, abnormal_data, s=markersize, c=markercolors)
            # plt.show()
            plt.savefig(
                './figures/{}/VisOut_LOF_hdim_{}_rollingsize_{}_MAD_{}_pid={}.png'
                .format(config.dataset, config.n_neighbors, 1,
                        train_filename.stem, config.pid),
                dpi=300)
            plt.close()

            markercolors = [
                'blue' if i == 1 else 'red' for i in lof_output.y_hat["IQR"]
            ]
            markersize = [4 if i == 1 else 25 for i in lof_output.y_hat["IQR"]]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(abnormal_data, alpha=0.7)
            plt.scatter(t, abnormal_data, s=markersize, c=markercolors)
            plt.savefig(
                './figures/{}/VisOut_LOF_hdim_{}_rollingsize_{}_IQR_{}_pid={}.png'
                .format(config.dataset, config.n_neighbors, 1,
                        train_filename.stem, config.pid),
                dpi=300)
            plt.close()
        else:
            file_logger.info('cannot plot image with x_dim > 1')

    if config.use_spot:
        pass
    else:
        pos_label = -1
        TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {}
        for threshold_method in lof_output.y_hat:
            cm = confusion_matrix(y_true=abnormal_label,
                                  y_pred=lof_output.y_hat[threshold_method],
                                  labels=[1, -1])
            TN[threshold_method] = cm[0][0]
            FP[threshold_method] = cm[0][1]
            FN[threshold_method] = cm[1][0]
            TP[threshold_method] = cm[1][1]
            precision[threshold_method] = precision_score(
                y_true=abnormal_label,
                y_pred=lof_output.y_hat[threshold_method],
                pos_label=pos_label)
            recall[threshold_method] = recall_score(
                y_true=abnormal_label,
                y_pred=lof_output.y_hat[threshold_method],
                pos_label=pos_label)
            f1[threshold_method] = f1_score(
                y_true=abnormal_label,
                y_pred=lof_output.y_hat[threshold_method],
                pos_label=pos_label)

        fpr, tpr, _ = roc_curve(y_true=abnormal_label,
                                y_score=-lof_output.negative_factor,
                                pos_label=pos_label)
        roc_auc = auc(fpr, tpr)
        pre, re, _ = precision_recall_curve(
            y_true=abnormal_label,
            probas_pred=-lof_output.negative_factor,
            pos_label=pos_label)
        pr_auc = auc(re, pre)
        metrics_result = MetricsResult(TN=TN,
                                       FP=FP,
                                       FN=FN,
                                       TP=TP,
                                       precision=precision,
                                       recall=recall,
                                       fbeta=f1,
                                       pr_auc=pr_auc,
                                       roc_auc=roc_auc)
        return metrics_result
def RunModel(train_filename, test_filename, label_filename, config, ratio):
    negative_sample = True if "noise" in config.dataset else False
    train_data, abnormal_data, abnormal_label = read_dataset(
        train_filename,
        test_filename,
        label_filename,
        normalize=True,
        file_logger=file_logger,
        negative_sample=negative_sample,
        ratio=ratio)
    original_x_dim = abnormal_data.shape[1]
    config.x_dim = abnormal_data.shape[1]

    Pab = []
    for i in range(abnormal_data.shape[1]):
        ts = abnormal_data[:, i]
        Pab_i, _ = stomp(ts, config.pattern_size)
        Pab.append(np.nan_to_num(Pab_i))
    Pab = np.sum(Pab, axis=0)
    # final_zscore = zscore(Pab)
    # np_decision = create_label_based_on_zscore(final_zscore, 2.5, True)
    #np_decision = create_label_based_on_quantile(-Pab, quantile=99)

    # higher -Pab is more likely to be anomalies.
    SD_Tmin, SD_Tmax = SD_autothreshold(-Pab)
    SD_y_hat = get_labels_by_threshold(-Pab,
                                       Tmax=SD_Tmax,
                                       use_max=True,
                                       use_min=False)
    MAD_Tmin, MAD_Tmax = MAD_autothreshold(-Pab)
    MAD_y_hat = get_labels_by_threshold(-Pab,
                                        Tmax=MAD_Tmax,
                                        use_max=True,
                                        use_min=False)
    IQR_Tmin, IQR_Tmax = IQR_autothreshold(-Pab)
    IQR_y_hat = get_labels_by_threshold(-Pab,
                                        Tmax=IQR_Tmax,
                                        use_max=True,
                                        use_min=False)
    np_decision = {}
    np_decision["SD"] = SD_y_hat
    np_decision["MAD"] = MAD_y_hat
    np_decision["IQR"] = IQR_y_hat

    if config.save_output:
        if not os.path.exists('./outputs/NPY/{}/'.format(config.dataset)):
            os.makedirs('./outputs/NPY/{}/'.format(config.dataset))
        np.save(
            './outputs/NPY/{}/MP_hdim_None_rollingsize_{}_{}_pid={}.npy'.
            format(config.dataset, config.pattern_size, train_filename.stem,
                   config.pid), Pab)

    # TODO metrics computation.

    # %%
    if config.save_figure:
        if original_x_dim == 1:
            plt.figure(figsize=(9, 3))
            plt.plot(ts, color='blue', lw=1.5)
            plt.title('Original Data')
            plt.grid(True)
            plt.tight_layout()
            # plt.show()
            plt.savefig(
                './figures/{}/Ori_MP_hdim_None_rollingsize_{}_{}_pid={}.png'.
                format(config.dataset, config.pattern_size,
                       train_filename.stem, config.pid),
                dpi=300)
            plt.close()

            # Plot decoder output
            plt.figure(figsize=(9, 3))
            plt.plot(Pab, color='blue', lw=1.5)
            plt.title('Profile Output')
            plt.grid(True)
            plt.tight_layout()
            # plt.show()
            plt.savefig(
                './figures/{}/Profile_MP_hdim_None_rollingsize_{}_{}_pid={}.png'
                .format(config.dataset, config.pattern_size,
                        train_filename.stem, config.pid),
                dpi=300)
            plt.close()

            t = np.arange(0, abnormal_data.shape[0])
            markercolors = ['blue' for i in range(config.pattern_size - 1)] + [
                'blue' if i == 1 else 'red'
                for i in abnormal_label[config.pattern_size - 1:]
            ]
            markersize = [4 for i in range(config.pattern_size - 1)] + [
                4 if i == 1 else 25
                for i in abnormal_label[config.pattern_size - 1:]
            ]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(np.squeeze(abnormal_data), alpha=0.7)
            plt.scatter(t, abnormal_data, s=markersize, c=markercolors)
            # plt.show()
            plt.savefig(
                './figures/{}/VisInp_MP_hdim_None_rollingsize_{}_{}_pid={}.png'
                .format(config.dataset, config.pattern_size,
                        train_filename.stem, config.pid),
                dpi=600)
            plt.close()

            markercolors = [
                'blue' if i == 1 else 'red' for i in np_decision["SD"]
            ] + ['blue' for i in range(config.pattern_size - 1)]
            markersize = [4 if i == 1 else 25 for i in np_decision["SD"]
                          ] + [4 for i in range(config.pattern_size - 1)]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(np.squeeze(abnormal_data), alpha=0.7)
            plt.scatter(t, abnormal_data, s=markersize, c=markercolors)
            # plt.show()
            plt.savefig(
                './figures/{}/VisOut_MP_hdim_None_rollingsize_{}_SD_{}_pid={}.png'
                .format(config.dataset, config.pattern_size,
                        train_filename.stem, config.pid),
                dpi=300)
            plt.close()

            markercolors = [
                'blue' if i == 1 else 'red' for i in np_decision["MAD"]
            ] + ['blue' for i in range(config.pattern_size - 1)]
            markersize = [4 if i == 1 else 25 for i in np_decision["MAD"]
                          ] + [4 for i in range(config.pattern_size - 1)]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(np.squeeze(abnormal_data), alpha=0.7)
            plt.scatter(t, abnormal_data, s=markersize, c=markercolors)
            # plt.show()
            plt.savefig(
                './figures/{}/VisOut_MP_hdim_None_rollingsize_{}_MAD_{}_pid={}.png'
                .format(config.dataset, config.pattern_size,
                        train_filename.stem, config.pid),
                dpi=300)
            plt.close()

            markercolors = [
                'blue' if i == 1 else 'red' for i in np_decision["IQR"]
            ] + ['blue' for i in range(config.pattern_size - 1)]
            markersize = [4 if i == 1 else 25 for i in np_decision["IQR"]
                          ] + [4 for i in range(config.pattern_size - 1)]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(np.squeeze(abnormal_data), alpha=0.7)
            plt.scatter(t, abnormal_data, s=markersize, c=markercolors)
            # plt.show()
            plt.savefig(
                './figures/{}/VisOut_MP_hdim_None_rollingsize_{}_IQR_{}_pid={}.png'
                .format(config.dataset, config.pattern_size,
                        train_filename.stem, config.pid),
                dpi=300)
            plt.close()
        else:
            file_logger.info('cannot plot image with x_dim > 1')

    if config.use_spot:
        pass
    else:

        pos_label = -1
        TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {}
        for threshold_method in np_decision:
            cm = confusion_matrix(y_true=abnormal_label[config.pattern_size -
                                                        1:],
                                  y_pred=np_decision[threshold_method],
                                  labels=[1, -1])
            TN[threshold_method] = cm[0][0]
            FP[threshold_method] = cm[0][1]
            FN[threshold_method] = cm[1][0]
            TP[threshold_method] = cm[1][1]
            precision[threshold_method] = precision_score(
                y_true=abnormal_label[config.pattern_size - 1:],
                y_pred=np_decision[threshold_method],
                pos_label=pos_label)
            recall[threshold_method] = recall_score(
                y_true=abnormal_label[config.pattern_size - 1:],
                y_pred=np_decision[threshold_method],
                pos_label=pos_label)
            f1[threshold_method] = f1_score(
                y_true=abnormal_label[config.pattern_size - 1:],
                y_pred=np_decision[threshold_method],
                pos_label=pos_label)

        fpr, tpr, _ = roc_curve(y_true=abnormal_label[config.pattern_size -
                                                      1:],
                                y_score=-Pab,
                                pos_label=pos_label)
        roc_auc = auc(fpr, tpr)
        pre, re, _ = precision_recall_curve(
            y_true=abnormal_label[config.pattern_size - 1:],
            probas_pred=-Pab,
            pos_label=pos_label)
        pr_auc = auc(re, pre)
        metrics_result = MetricsResult(TN=TN,
                                       FP=FP,
                                       FN=FN,
                                       TP=TP,
                                       precision=precision,
                                       recall=recall,
                                       fbeta=f1,
                                       pr_auc=pr_auc,
                                       roc_auc=roc_auc)
        return metrics_result
Ejemplo n.º 4
0
def RunModel(train_filename, test_filename, label_filename, config, ratio,
             gpu_id):
    negative_sample = True if "noise" in config.dataset else False
    train_data, abnormal_data, abnormal_label = read_dataset(
        train_filename,
        test_filename,
        label_filename,
        normalize=True,
        file_logger=file_logger,
        negative_sample=negative_sample,
        ratio=ratio)
    original_x_dim = abnormal_data.shape[1]

    if abnormal_data.shape[0] < config.rolling_size:
        train_logger.warning(
            "test data is less than rolling_size! Ignore the current data!")
        TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {}
        for threshold_method in ["SD", "MAD", "IQR"]:
            TN[threshold_method] = -1
            FP[threshold_method] = -1
            FN[threshold_method] = -1
            TP[threshold_method] = -1
            precision[threshold_method] = -1
            recall[threshold_method] = -1
            f1[threshold_method] = -1
        roc_auc = -1
        pr_auc = -1
        metrics_result = MetricsResult(TN=TN,
                                       FP=FP,
                                       FN=FN,
                                       TP=TP,
                                       precision=precision,
                                       recall=recall,
                                       fbeta=f1,
                                       pr_auc=pr_auc,
                                       roc_auc=roc_auc)
        return metrics_result

    rolling_train_data = None
    rolling_valid_data = None
    if config.preprocessing:
        if config.use_overlapping:
            if train_data is not None:
                rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = rolling_window_2D(
                    train_data, config.rolling_size), rolling_window_2D(
                        abnormal_data, config.rolling_size), rolling_window_2D(
                            abnormal_label, config.rolling_size)
                train_split_idx = int(rolling_train_data.shape[0] * 0.7)
                rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[
                    train_split_idx:]
            else:
                rolling_abnormal_data, rolling_abnormal_label = rolling_window_2D(
                    abnormal_data, config.rolling_size), rolling_window_2D(
                        abnormal_label, config.rolling_size)
        else:
            if train_data is not None:
                rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = cutting_window_2D(
                    train_data, config.rolling_size), cutting_window_2D(
                        abnormal_data, config.rolling_size), cutting_window_2D(
                            abnormal_label, config.rolling_size)
                train_split_idx = int(rolling_train_data.shape[0] * 0.7)
                rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[
                    train_split_idx:]
            else:
                rolling_abnormal_data, rolling_abnormal_label = cutting_window_2D(
                    abnormal_data, config.rolling_size), cutting_window_2D(
                        abnormal_label, config.rolling_size)
        if train_data is not None:
            rolling_train_data, rolling_valid_data, rolling_abnormal_data, rolling_abnormal_label = np.reshape(
                rolling_train_data, [rolling_train_data.shape[0], rolling_train_data.shape[1] * rolling_train_data.shape[2]]), \
                                                                                                    np.reshape(
                rolling_train_data, [rolling_train_data.shape[0], rolling_train_data.shape[1] * rolling_train_data.shape[2]]), \
                                                                                                    np.reshape(
                rolling_abnormal_data, [rolling_abnormal_data.shape[0], rolling_abnormal_data.shape[1] * rolling_abnormal_data.shape[2]]), \
                                                                                                    np.reshape(
                rolling_abnormal_label, [rolling_abnormal_label.shape[0], rolling_abnormal_label.shape[1] * rolling_abnormal_label.shape[2]])
        else:
            rolling_abnormal_data, rolling_abnormal_label = np.reshape(
                rolling_abnormal_data, [
                    rolling_abnormal_data.shape[0],
                    rolling_abnormal_data.shape[1] *
                    rolling_abnormal_data.shape[2]
                ]), np.reshape(rolling_abnormal_label, [
                    rolling_abnormal_label.shape[0],
                    rolling_abnormal_label.shape[1] *
                    rolling_abnormal_label.shape[2]
                ])
    else:
        if train_data is not None:
            rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = train_data, abnormal_data, abnormal_label
        else:
            rolling_abnormal_data, rolling_abnormal_label = abnormal_data, abnormal_label

    config.x_dim = rolling_abnormal_data.shape[1]

    ensemble_error = []
    ensemble_output = []
    ensemble_TN = []
    ensemble_TP = []
    ensemble_FN = []
    ensemble_FP = []
    ensemble_PRECISION = []
    ensemble_RECALL = []
    ensemble_FBETA = []
    ensemble_PR_AUC = []
    ensemble_ROC_AUC = []
    ensemble_CKS = []
    # only to show the results
    for i in range(config.ensemble_space):
        train_logger.info('component #{}'.format(i))
        model = RN(file_name=train_filename, config=config, gpu_id=gpu_id)
        model = model.to(device)
        if train_data is not None and config.robustness == False:
            rn_output = model.fit(train_input=rolling_train_data,
                                  train_label=rolling_train_data,
                                  valid_input=rolling_valid_data,
                                  valid_label=rolling_valid_data,
                                  test_input=rolling_abnormal_data,
                                  test_label=rolling_abnormal_label,
                                  abnormal_data=abnormal_data,
                                  abnormal_label=abnormal_label,
                                  original_x_dim=original_x_dim)
        elif train_data is None or config.robustness == True:
            rn_output = model.fit(train_input=rolling_abnormal_data,
                                  train_label=rolling_abnormal_data,
                                  valid_input=rolling_valid_data,
                                  valid_label=rolling_valid_data,
                                  test_input=rolling_abnormal_data,
                                  test_label=rolling_abnormal_label,
                                  abnormal_data=abnormal_data,
                                  abnormal_label=abnormal_label,
                                  original_x_dim=original_x_dim)

        # %%
        min_max_scaler = preprocessing.MinMaxScaler()
        if config.preprocessing:
            if config.use_overlapping:
                if config.use_last_point:
                    dec_mean_unroll = np.reshape(
                        rn_output.dec_means.detach().cpu().numpy(),
                        (-1, config.rolling_size, original_x_dim))[:, -1]
                    dec_mean_unroll = min_max_scaler.fit_transform(
                        dec_mean_unroll)
                    x_original_unroll = abnormal_data[config.rolling_size - 1:]
                else:
                    dec_mean_unroll = unroll_window_3D(
                        np.reshape(
                            rn_output.dec_means.detach().cpu().numpy(),
                            (-1, config.rolling_size, original_x_dim)))[::-1]
                    dec_mean_unroll = min_max_scaler.fit_transform(
                        dec_mean_unroll)
                    x_original_unroll = abnormal_data[:dec_mean_unroll.
                                                      shape[0]]

            else:
                dec_mean_unroll = np.reshape(
                    rn_output.dec_means.detach().cpu().numpy(),
                    (-1, original_x_dim))
                dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll)
                x_original_unroll = abnormal_data[:dec_mean_unroll.shape[0]]
        else:
            dec_mean_unroll = rn_output.dec_means.detach().cpu().numpy()
            dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll)
            x_original_unroll = abnormal_data

        if config.save_output:
            np.save(
                './save_outputs/NPY/{}/Dec_RN_{}_{}_pid={}.npy'.format(
                    config.dataset,
                    Path(train_filename).stem, i, config.pid), dec_mean_unroll)

        error = np.sum(abnormal_data[:dec_mean_unroll.shape[0]] -
                       np.reshape(dec_mean_unroll, [-1, original_x_dim]),
                       axis=1)**2
        ensemble_error.append(error)
        ensemble_output.append(dec_mean_unroll)
        ensemble_FN.append(rn_output.best_FN)
        ensemble_TN.append(rn_output.best_TN)
        ensemble_FP.append(rn_output.best_FP)
        ensemble_TP.append(rn_output.best_TP)
        ensemble_PRECISION.append(rn_output.best_precision)
        ensemble_RECALL.append(rn_output.best_recall)
        ensemble_FBETA.append(rn_output.best_fbeta)
        ensemble_PR_AUC.append(rn_output.best_pr_auc)
        ensemble_ROC_AUC.append(rn_output.best_roc_auc)
        ensemble_CKS.append(rn_output.best_cks)

    error = np.stack(ensemble_error, axis=0)
    error = np.median(error, axis=0)
    dec_mean_unroll = np.stack(ensemble_output, axis=0)
    dec_mean_unroll = np.median(dec_mean_unroll, axis=0)
    SD_Tmin, SD_Tmax = SD_autothreshold(error)
    SD_y_hat = get_labels_by_threshold(error,
                                       Tmax=SD_Tmax,
                                       use_max=True,
                                       use_min=False)
    MAD_Tmin, MAD_Tmax = MAD_autothreshold(error)
    MAD_y_hat = get_labels_by_threshold(error,
                                        Tmax=MAD_Tmax,
                                        use_max=True,
                                        use_min=False)
    IQR_Tmin, IQR_Tmax = IQR_autothreshold(error)
    IQR_y_hat = get_labels_by_threshold(error,
                                        Tmax=IQR_Tmax,
                                        use_max=True,
                                        use_min=False)
    np_decision = {}
    np_decision["SD"] = SD_y_hat
    np_decision["MAD"] = MAD_y_hat
    np_decision["IQR"] = IQR_y_hat

    # TODO metrics computation.

    # TODO save output
    if config.save_output:
        np.save(
            './save_outputs/NPY/{}/Dec_RN_{}_pid={}.npy'.format(
                config.dataset,
                Path(train_filename).stem, config.pid), dec_mean_unroll)
        np.save(
            './save_outputs/NPY/{}/Error_RN_{}_pid={}.npy'.format(
                config.dataset,
                Path(train_filename).stem, config.pid), error)

    # %%
    if config.save_figure:
        if original_x_dim == 1:
            plt.figure(figsize=(9, 3))
            plt.plot(x_original_unroll, color='blue', lw=1.5)
            plt.title('Original Data')
            plt.grid(True)
            plt.tight_layout()
            # plt.show()
            plt.savefig('./save_figures/{}/Ori_RN_{}_pid={}.png'.format(
                config.dataset,
                Path(train_filename).stem, config.pid),
                        dpi=300)
            plt.close()

            # Plot decoder output
            plt.figure(figsize=(9, 3))
            plt.plot(dec_mean_unroll, color='blue', lw=1.5)
            plt.title('Decoding Output')
            plt.grid(True)
            plt.tight_layout()
            # plt.show()
            plt.savefig('./save_figures/{}/Dec_RN_{}_pid={}.png'.format(
                config.dataset,
                Path(train_filename).stem, config.pid),
                        dpi=300)
            plt.close()

            t = np.arange(0, abnormal_data.shape[0])
            markercolors = [
                'blue' if i == 1 else 'red'
                for i in abnormal_label[:dec_mean_unroll.shape[0]]
            ]
            markersize = [
                4 if i == 1 else 25
                for i in abnormal_label[:dec_mean_unroll.shape[0]]
            ]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(abnormal_data[:dec_mean_unroll.shape[0]], alpha=0.7)
            plt.scatter(t[:dec_mean_unroll.shape[0]],
                        x_original_unroll[:np_decision.shape[0]],
                        s=markersize,
                        c=markercolors)
            # plt.show()
            plt.savefig('./save_figures/{}/VisInp_RN_{}_pid={}.png'.format(
                config.dataset,
                Path(train_filename).stem, config.pid),
                        dpi=300)
            plt.close()

            markercolors = ['blue' for i in range(config.rolling_size - 1)] + [
                'blue' if i == 1 else 'red' for i in np_decision
            ]
            markersize = [4 for i in range(config.rolling_size - 1)
                          ] + [4 if i == 1 else 25 for i in np_decision]
            plt.figure(figsize=(9, 3))
            ax = plt.axes()
            plt.yticks([0, 0.25, 0.5, 0.75, 1])
            ax.set_xlim(t[0] - 10, t[-1] + 10)
            ax.set_ylim(-0.10, 1.10)
            plt.xlabel('$t$')
            plt.ylabel('$s$')
            plt.grid(True)
            plt.tight_layout()
            plt.margins(0.1)
            plt.plot(abnormal_data, alpha=0.7)
            plt.scatter(t, abnormal_data, s=markersize, c=markercolors)
            # plt.show()
            plt.savefig('./save_figures/{}/VisOut_RN_{}_pid={}.png'.format(
                config.dataset,
                Path(train_filename).stem, config.pid),
                        dpi=300)
            plt.close()
        else:
            file_logger.info('cannot plot image with x_dim > 1')

    if config.use_spot:
        pass
    else:
        try:
            pos_label = -1
            TN, FP, FN, TP, precision, recall, f1, cks = {}, {}, {}, {}, {}, {}, {}, {}
            for threshold_method in np_decision:
                cm = confusion_matrix(
                    y_true=abnormal_label[config.rolling_size - 1:],
                    y_pred=np_decision[threshold_method],
                    labels=[1, -1])
                TN[threshold_method] = cm[0][0]
                FP[threshold_method] = cm[0][1]
                FN[threshold_method] = cm[1][0]
                TP[threshold_method] = cm[1][1]
                precision[threshold_method] = precision_score(
                    y_true=abnormal_label[config.rolling_size - 1:],
                    y_pred=np_decision[threshold_method],
                    pos_label=pos_label)
                recall[threshold_method] = recall_score(
                    y_true=abnormal_label[config.rolling_size - 1:],
                    y_pred=np_decision[threshold_method],
                    pos_label=pos_label)
                f1[threshold_method] = f1_score(
                    y_true=abnormal_label[config.rolling_size - 1:],
                    y_pred=np_decision[threshold_method],
                    pos_label=pos_label)
                cks[threshold_method] = cohen_kappa_score(
                    y1=abnormal_label[config.rolling_size - 1:],
                    y2=np_decision[threshold_method])

            fpr, tpr, _ = roc_curve(y_true=abnormal_label[config.rolling_size -
                                                          1:],
                                    y_score=np.nan_to_num(error),
                                    pos_label=pos_label)
            roc_auc = auc(fpr, tpr)
            pre, re, _ = precision_recall_curve(
                y_true=abnormal_label[config.rolling_size - 1:],
                probas_pred=np.nan_to_num(error),
                pos_label=pos_label)
            pr_auc = auc(re, pre)

            metrics_result = MetricsResult(
                TN=TN,
                FP=FP,
                FN=FN,
                TP=TP,
                precision=precision,
                recall=recall,
                fbeta=f1,
                pr_auc=pr_auc,
                roc_auc=roc_auc,
                best_TN=rn_output.best_TN,
                best_FP=rn_output.best_FP,
                best_FN=rn_output.best_FN,
                best_TP=rn_output.best_TP,
                best_precision=rn_output.best_precision,
                best_recall=rn_output.best_recall,
                best_fbeta=rn_output.best_fbeta,
                best_pr_auc=rn_output.best_pr_auc,
                best_roc_auc=rn_output.best_roc_auc,
                best_cks=rn_output.best_cks,
                min_valid_loss=rn_output.min_valid_loss,
                testing_time=rn_output.testing_time,
                training_time=rn_output.training_time,
                memory_usage_nvidia=rn_output.memory_usage_nvidia)
            return metrics_result
        except:
            pass
Ejemplo n.º 5
0
def RunModel(train_filename, test_filename, label_filename, config, ratio):
    negative_sample = True if "noise" in config.dataset else False
    train_data, abnormal_data, abnormal_label = read_dataset(
        train_filename,
        test_filename,
        label_filename,
        normalize=True,
        file_logger=file_logger,
        negative_sample=negative_sample,
        ratio=ratio)

    if abnormal_data.shape[0] < config.rolling_size:
        train_logger.warning(
            "test data is less than rolling_size! Ignore the current data!")
        TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {}
        for threshold_method in ["SD", "MAD", "IQR"]:
            TN[threshold_method] = -1
            FP[threshold_method] = -1
            FN[threshold_method] = -1
            TP[threshold_method] = -1
            precision[threshold_method] = -1
            recall[threshold_method] = -1
            f1[threshold_method] = -1
        roc_auc = -1
        pr_auc = -1
        metrics_result = MetricsResult(TN=TN,
                                       FP=FP,
                                       FN=FN,
                                       TP=TP,
                                       precision=precision,
                                       recall=recall,
                                       fbeta=f1,
                                       pr_auc=pr_auc,
                                       roc_auc=roc_auc)
        return metrics_result

    original_x_dim = abnormal_data.shape[1]

    rolling_train_data = None
    rolling_valid_data = None
    if config.preprocessing:
        if config.use_overlapping:
            if train_data is not None:
                rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = rolling_window_2D(
                    train_data, config.rolling_size), rolling_window_2D(
                        abnormal_data, config.rolling_size), rolling_window_2D(
                            abnormal_label, config.rolling_size)
                train_split_idx = int(rolling_train_data.shape[0] * 0.7)
                rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[
                    train_split_idx:]
            else:
                rolling_abnormal_data, rolling_abnormal_label = rolling_window_2D(
                    abnormal_data, config.rolling_size), rolling_window_2D(
                        abnormal_label, config.rolling_size)
        else:
            if train_data is not None:
                rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = cutting_window_2D(
                    train_data, config.rolling_size), cutting_window_2D(
                        abnormal_data, config.rolling_size), cutting_window_2D(
                            abnormal_label, config.rolling_size)
                train_split_idx = int(rolling_train_data.shape[0] * 0.7)
                rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[
                    train_split_idx:]
            else:
                rolling_abnormal_data, rolling_abnormal_label = cutting_window_2D(
                    abnormal_data, config.rolling_size), cutting_window_2D(
                        abnormal_label, config.rolling_size)
    else:
        if train_data is not None:
            rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = np.expand_dims(
                train_data,
                axis=0), np.expand_dims(abnormal_data,
                                        axis=0), np.expand_dims(abnormal_label,
                                                                axis=0)
            train_split_idx = int(rolling_train_data.shape[0] * 0.7)
            rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[
                train_split_idx:]
        else:
            rolling_abnormal_data, rolling_abnormal_label = np.expand_dims(
                abnormal_data, axis=0), np.expand_dims(abnormal_label, axis=0)

    config.x_dim = rolling_abnormal_data.shape[1]

    model = CAE(file_name=train_filename, config=config)
    model = model.to(device)
    cae_output = None
    if train_data is not None and config.robustness == False:
        cae_output = model.fit(train_input=rolling_train_data,
                               train_label=rolling_train_data,
                               valid_input=rolling_valid_data,
                               valid_label=rolling_valid_data,
                               test_input=rolling_abnormal_data,
                               test_label=rolling_abnormal_label,
                               abnormal_data=abnormal_data,
                               abnormal_label=abnormal_label,
                               original_x_dim=original_x_dim)
    elif train_data is None or config.robustness == True:
        cae_output = model.fit(train_input=rolling_abnormal_data,
                               train_label=rolling_abnormal_data,
                               valid_input=rolling_valid_data,
                               valid_label=rolling_valid_data,
                               test_input=rolling_abnormal_data,
                               test_label=rolling_abnormal_label,
                               abnormal_data=abnormal_data,
                               abnormal_label=abnormal_label,
                               original_x_dim=original_x_dim)
    # %%
    min_max_scaler = preprocessing.MinMaxScaler()
    if config.preprocessing:
        if config.use_overlapping:
            if config.use_last_point:
                dec_mean_unroll = cae_output.dec_means.detach().cpu().numpy(
                )[:, -1]
                dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll)
                x_original_unroll = abnormal_data[config.rolling_size - 1:]
            else:
                dec_mean_unroll = unroll_window_3D(
                    np.reshape(
                        cae_output.dec_means.detach().cpu().numpy(),
                        (-1, config.rolling_size, original_x_dim)))[::-1]
                dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll)
                x_original_unroll = abnormal_data[:dec_mean_unroll.shape[0]]

        else:
            dec_mean_unroll = np.reshape(
                cae_output.dec_means.detach().cpu().numpy(),
                (-1, original_x_dim))
            dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll)
            x_original_unroll = abnormal_data[:dec_mean_unroll.shape[0]]
    else:
        dec_mean_unroll = cae_output.dec_means.detach().cpu().numpy()
        dec_mean_unroll = np.transpose(np.squeeze(dec_mean_unroll, axis=0))
        dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll)
        x_original_unroll = abnormal_data

    if config.save_output:
        if not os.path.exists('./outputs/NPY/{}/'.format(config.dataset)):
            os.makedirs('./outputs/NPY/{}/'.format(config.dataset))
        np.save(
            './outputs/NPY/{}/Dec_CAE_hdim_{}_rollingsize_{}_{}_pid={}.npy'.
            format(config.dataset, config.h_dim, config.rolling_size,
                   Path(train_filename).stem, config.pid), dec_mean_unroll)

    error = np.sum(x_original_unroll -
                   np.reshape(dec_mean_unroll, [-1, original_x_dim]),
                   axis=1)**2
    # final_zscore = zscore(error)
    # np_decision = create_label_based_on_zscore(final_zscore, 2.5, True)
    # np_decision = create_label_based_on_quantile(error, quantile=99)
    SD_Tmin, SD_Tmax = SD_autothreshold(error)
    SD_y_hat = get_labels_by_threshold(error,
                                       Tmax=SD_Tmax,
                                       use_max=True,
                                       use_min=False)
    MAD_Tmin, MAD_Tmax = MAD_autothreshold(error)
    MAD_y_hat = get_labels_by_threshold(error,
                                        Tmax=MAD_Tmax,
                                        use_max=True,
                                        use_min=False)
    IQR_Tmin, IQR_Tmax = IQR_autothreshold(error)
    IQR_y_hat = get_labels_by_threshold(error,
                                        Tmax=IQR_Tmax,
                                        use_max=True,
                                        use_min=False)
    np_decision = {}
    np_decision["SD"] = SD_y_hat
    np_decision["MAD"] = MAD_y_hat
    np_decision["IQR"] = IQR_y_hat

    # TODO metrics computation.

    # %%
    if config.save_figure:
        file_logger.info('save_figure has been dropped.')

    if config.use_spot:
        pass
    else:
        pos_label = -1
        TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {}
        for threshold_method in np_decision:
            cm = confusion_matrix(y_true=abnormal_label,
                                  y_pred=np_decision[threshold_method],
                                  labels=[1, -1])
            TN[threshold_method] = cm[0][0]
            FP[threshold_method] = cm[0][1]
            FN[threshold_method] = cm[1][0]
            TP[threshold_method] = cm[1][1]
            precision[threshold_method] = precision_score(
                y_true=abnormal_label,
                y_pred=np_decision[threshold_method],
                pos_label=pos_label)
            recall[threshold_method] = recall_score(
                y_true=abnormal_label,
                y_pred=np_decision[threshold_method],
                pos_label=pos_label)
            f1[threshold_method] = f1_score(
                y_true=abnormal_label,
                y_pred=np_decision[threshold_method],
                pos_label=pos_label)

        fpr, tpr, _ = roc_curve(y_true=abnormal_label,
                                y_score=np.nan_to_num(error),
                                pos_label=pos_label)
        roc_auc = auc(fpr, tpr)
        pre, re, _ = precision_recall_curve(y_true=abnormal_label,
                                            probas_pred=np.nan_to_num(error),
                                            pos_label=pos_label)
        pr_auc = auc(re, pre)
        metrics_result = MetricsResult(
            TN=TN,
            FP=FP,
            FN=FN,
            TP=TP,
            precision=precision,
            recall=recall,
            fbeta=f1,
            pr_auc=pr_auc,
            roc_auc=roc_auc,
            best_TN=cae_output.best_TN,
            best_FP=cae_output.best_FP,
            best_FN=cae_output.best_FN,
            best_TP=cae_output.best_TP,
            best_precision=cae_output.best_precision,
            best_recall=cae_output.best_recall,
            best_fbeta=cae_output.best_fbeta,
            best_pr_auc=cae_output.best_pr_auc,
            best_roc_auc=cae_output.best_roc_auc,
            best_cks=cae_output.best_cks,
            min_valid_loss=cae_output.min_valid_loss)
        return metrics_result