def RunModel(train_filename, test_filename, label_filename, config, ratio): negative_sample = True if "noise" in config.dataset else False train_data, abnormal_data, abnormal_label = read_dataset( train_filename, test_filename, label_filename, normalize=True, file_logger=file_logger, negative_sample=negative_sample, ratio=ratio) if abnormal_data.shape[0] < config.rolling_size: train_logger.warning( "test data is less than rolling_size! Ignore the current data!") TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {} for threshold_method in ["SD", "MAD", "IQR"]: TN[threshold_method] = -1 FP[threshold_method] = -1 FN[threshold_method] = -1 TP[threshold_method] = -1 precision[threshold_method] = -1 recall[threshold_method] = -1 f1[threshold_method] = -1 roc_auc = -1 pr_auc = -1 metrics_result = MetricsResult(TN=TN, FP=FP, FN=FN, TP=TP, precision=precision, recall=recall, fbeta=f1, pr_auc=pr_auc, roc_auc=roc_auc) return metrics_result original_x_dim = abnormal_data.shape[1] rolling_train_data = None rolling_valid_data = None if config.preprocessing: if config.use_overlapping: if train_data is not None: rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = rolling_window_2D( train_data, config.rolling_size), rolling_window_2D( abnormal_data, config.rolling_size), rolling_window_2D( abnormal_label, config.rolling_size) train_split_idx = int(rolling_train_data.shape[0] * 0.7) rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[ train_split_idx:] else: rolling_abnormal_data, rolling_abnormal_label = rolling_window_2D( abnormal_data, config.rolling_size), rolling_window_2D( abnormal_label, config.rolling_size) else: if train_data is not None: rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = cutting_window_2D( train_data, config.rolling_size), cutting_window_2D( abnormal_data, config.rolling_size), cutting_window_2D( abnormal_label, config.rolling_size) train_split_idx = int(rolling_train_data.shape[0] * 0.7) rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[ train_split_idx:] else: rolling_abnormal_data, rolling_abnormal_label = cutting_window_2D( abnormal_data, config.rolling_size), cutting_window_2D( abnormal_label, config.rolling_size) else: if train_data is not None: rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = np.expand_dims( train_data, axis=0), np.expand_dims(abnormal_data, axis=0), np.expand_dims(abnormal_label, axis=0) train_split_idx = int(rolling_train_data.shape[0] * 0.7) rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[ train_split_idx:] else: rolling_abnormal_data, rolling_abnormal_label = np.expand_dims( abnormal_data, axis=0), np.expand_dims(abnormal_label, axis=0) config.x_dim = rolling_abnormal_data.shape[2] model = DONUT(file_name=train_filename, config=config) model = model.to(device) donut_output = None if train_data is not None and config.robustness == False: donut_output = model.fit(train_input=rolling_train_data, train_label=rolling_train_data, valid_input=rolling_valid_data, valid_label=rolling_valid_data, test_input=rolling_abnormal_data, test_label=rolling_abnormal_label, abnormal_data=abnormal_data, abnormal_label=abnormal_label, original_x_dim=original_x_dim) elif train_data is None or config.robustness == True: donut_output = model.fit(train_input=rolling_abnormal_data, train_label=rolling_abnormal_data, valid_input=rolling_valid_data, valid_label=rolling_valid_data, test_input=rolling_abnormal_data, test_label=rolling_abnormal_label, abnormal_data=abnormal_data, abnormal_label=abnormal_label, original_x_dim=original_x_dim) # %% min_max_scaler = preprocessing.MinMaxScaler() if config.preprocessing: if config.use_overlapping: if config.use_last_point: dec_mean_unroll = np.reshape( donut_output.dec_means.detach().cpu().numpy(), (-1, config.rolling_size, original_x_dim))[:, -1] latent_mean_unroll = donut_output.zs.detach().cpu().numpy() dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll) x_original_unroll = abnormal_data[config.rolling_size - 1:] else: dec_mean_unroll = unroll_window_3D( np.reshape( donut_output.dec_means.detach().cpu().numpy(), (-1, config.rolling_size, original_x_dim)))[::-1] latent_mean_unroll = donut_output.zs.detach().cpu().numpy() dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll) x_original_unroll = abnormal_data[:dec_mean_unroll.shape[0]] else: dec_mean_unroll = np.reshape( donut_output.dec_means.detach().cpu().numpy(), (-1, original_x_dim)) latent_mean_unroll = donut_output.zs.detach().cpu().numpy() dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll) x_original_unroll = abnormal_data[:dec_mean_unroll.shape[0]] else: dec_mean_unroll = donut_output.dec_means.detach().cpu().numpy() latent_mean_unroll = donut_output.zs.detach().cpu().numpy() dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll) x_original_unroll = abnormal_data if config.save_output: if not os.path.exists('./outputs/NPY/{}/'.format(config.dataset)): os.makedirs('./outputs/NPY/{}/'.format(config.dataset)) np.save( './outputs/NPY/{}/Dec_DONUT_hdim_{}_rollingsize_{}_{}_pid={}.npy'. format(config.dataset, config.h_dim, config.rolling_size, train_filename.stem, config.pid), dec_mean_unroll) np.save( './outputs/NPY/{}/Latent_DONUT_hdim_{}_rollingsize_{}_{}_pid={}.npy' .format(config.dataset, config.h_dim, config.rolling_size, train_filename.stem, config.pid), latent_mean_unroll) error = np.sum(x_original_unroll - np.reshape(dec_mean_unroll, [-1, original_x_dim]), axis=1)**2 # final_zscore = zscore(error) # np_decision = create_label_based_on_zscore(final_zscore, 2.5, True) #np_decision = create_label_based_on_quantile(error, quantile=99) SD_Tmin, SD_Tmax = SD_autothreshold(error) SD_y_hat = get_labels_by_threshold(error, Tmax=SD_Tmax, use_max=True, use_min=False) MAD_Tmin, MAD_Tmax = MAD_autothreshold(error) MAD_y_hat = get_labels_by_threshold(error, Tmax=MAD_Tmax, use_max=True, use_min=False) IQR_Tmin, IQR_Tmax = IQR_autothreshold(error) IQR_y_hat = get_labels_by_threshold(error, Tmax=IQR_Tmax, use_max=True, use_min=False) np_decision = {} np_decision["SD"] = SD_y_hat np_decision["MAD"] = MAD_y_hat np_decision["IQR"] = IQR_y_hat # TODO metrics computation. # %% if config.save_figure: if original_x_dim == 1: plt.figure(figsize=(9, 3)) plt.plot(x_original_unroll, color='blue', lw=1.5) plt.title('Original Data') plt.grid(True) plt.tight_layout() # plt.show() plt.savefig( './figures/{}/Ori_DONUT_hdim_{}_rollingsize_{}_{}_pid={}.png'. format(config.dataset, config.h_dim, config.rolling_size, train_filename.stem, config.pid), dpi=600) plt.close() # Plot decoder output plt.figure(figsize=(9, 3)) plt.plot(dec_mean_unroll, color='blue', lw=1.5) plt.title('Decoding Output') plt.grid(True) plt.tight_layout() # plt.show() plt.savefig( './figures/{}/Dec_DONUT_hdim_{}_rollingsize_{}_{}_pid={}.png'. format(config.dataset, config.h_dim, config.rolling_size, train_filename.stem, config.pid), dpi=600) plt.close() t = np.arange(0, abnormal_data.shape[0]) # markercolors = ['blue' if i == 1 else 'red' for i in abnormal_label[: dec_mean_unroll.shape[0]]] # markersize = [4 if i == 1 else 25 for i in abnormal_label[: dec_mean_unroll.shape[0]]] # plt.figure(figsize=(9, 3)) # ax = plt.axes() # plt.yticks([0, 0.25, 0.5, 0.75, 1]) # ax.set_xlim(t[0] - 10, t[-1] + 10) # ax.set_ylim(-0.10, 1.10) # plt.xlabel('$t$') # plt.ylabel('$s$') # plt.grid(True) # plt.tight_layout() # plt.margins(0.1) # plt.plot(abnormal_data[: dec_mean_unroll.shape[0]], alpha=0.7) # plt.scatter(t[: dec_mean_unroll.shape[0]], x_original_unroll[: np_decision.shape[0]], s=markersize, c=markercolors) # # plt.show() # plt.savefig('./figures/{}/VisInp_DONUT_{}_pid={}.png'.format(config.dataset, Path(file_name).stem, config.pid), dpi=600) # plt.close() markercolors = ['blue' for i in range(config.rolling_size - 1)] + [ 'blue' if i == 1 else 'red' for i in np_decision["SD"] ] markersize = [4 for i in range(config.rolling_size - 1) ] + [4 if i == 1 else 25 for i in np_decision["SD"]] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(abnormal_data, alpha=0.7) plt.scatter(t, abnormal_data, s=markersize, c=markercolors) # plt.show() plt.savefig( './figures/{}/VisOut_DONUT_hdim_{}_rollingsize_{}_SD_{}_pid={}.png' .format(config.dataset, config.h_dim, config.rolling_size, train_filename.stem, config.pid), dpi=300) plt.close() markercolors = ['blue' for i in range(config.rolling_size - 1)] + [ 'blue' if i == 1 else 'red' for i in np_decision["MAD"] ] markersize = [4 for i in range(config.rolling_size - 1)] + [ 4 if i == 1 else 25 for i in np_decision["MAD"] ] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(abnormal_data, alpha=0.7) plt.scatter(t, abnormal_data, s=markersize, c=markercolors) # plt.show() plt.savefig( './figures/{}/VisOut_DONUT_hdim_{}_rollingsize_{}_MAD_{}_pid={}.png' .format(config.dataset, config.h_dim, config.rolling_size, train_filename.stem, config.pid), dpi=300) plt.close() markercolors = ['blue' for i in range(config.rolling_size - 1)] + [ 'blue' if i == 1 else 'red' for i in np_decision["IQR"] ] markersize = [4 for i in range(config.rolling_size - 1)] + [ 4 if i == 1 else 25 for i in np_decision["IQR"] ] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(abnormal_data, alpha=0.7) plt.scatter(t, abnormal_data, s=markersize, c=markercolors) # plt.show() plt.savefig( './figures/{}/VisOut_DONUT_hdim_{}_rollingsize_{}_IQR_{}_pid={}.png' .format(config.dataset, config.h_dim, config.rolling_size, train_filename.stem, config.pid), dpi=300) plt.close() else: file_logger.info('cannot plot image with x_dim > 1') if config.use_spot: pass else: pos_label = -1 TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {} for threshold_method in np_decision: cm = confusion_matrix(y_true=abnormal_label, y_pred=np_decision[threshold_method], labels=[1, -1]) TN[threshold_method] = cm[0][0] FP[threshold_method] = cm[0][1] FN[threshold_method] = cm[1][0] TP[threshold_method] = cm[1][1] precision[threshold_method] = precision_score( y_true=abnormal_label, y_pred=np_decision[threshold_method], pos_label=pos_label) recall[threshold_method] = recall_score( y_true=abnormal_label, y_pred=np_decision[threshold_method], pos_label=pos_label) f1[threshold_method] = f1_score( y_true=abnormal_label, y_pred=np_decision[threshold_method], pos_label=pos_label) fpr, tpr, _ = roc_curve(y_true=abnormal_label, y_score=np.nan_to_num(error), pos_label=pos_label) roc_auc = auc(fpr, tpr) pre, re, _ = precision_recall_curve(y_true=abnormal_label, probas_pred=np.nan_to_num(error), pos_label=pos_label) pr_auc = auc(re, pre) metrics_result = MetricsResult( TN=TN, FP=FP, FN=FN, TP=TP, precision=precision, recall=recall, fbeta=f1, pr_auc=pr_auc, roc_auc=roc_auc, best_TN=donut_output.best_TN, best_FP=donut_output.best_FP, best_FN=donut_output.best_FN, best_TP=donut_output.best_TP, best_precision=donut_output.best_precision, best_recall=donut_output.best_recall, best_fbeta=donut_output.best_fbeta, best_pr_auc=donut_output.best_pr_auc, best_roc_auc=donut_output.best_roc_auc, best_cks=donut_output.best_cks) return metrics_result
def RunModel(train_filename, test_filename, label_filename, config, ratio): negative_sample = True if "noise" in config.dataset else False train_data, abnormal_data, abnormal_label = read_dataset( train_filename, test_filename, label_filename, normalize=True, file_logger=file_logger, negative_sample=negative_sample, ratio=ratio) original_x_dim = abnormal_data.shape[1] config.x_dim = abnormal_data.shape[1] model = LOF(train_filename, config) lof_output = model.fit(train_input=abnormal_data, train_label=abnormal_label, test_input=abnormal_data, test_label=abnormal_label) SD_Tmin, SD_Tmax = SD_autothreshold(-lof_output.negative_factor) SD_y_hat = get_labels_by_threshold(-lof_output.negative_factor, Tmax=SD_Tmax, use_max=True, use_min=False) MAD_Tmin, MAD_Tmax = MAD_autothreshold(-lof_output.negative_factor) MAD_y_hat = get_labels_by_threshold(-lof_output.negative_factor, Tmax=MAD_Tmax, use_max=True, use_min=False) IQR_Tmin, IQR_Tmax = IQR_autothreshold(-lof_output.negative_factor) IQR_y_hat = get_labels_by_threshold(-lof_output.negative_factor, Tmax=IQR_Tmax, use_max=True, use_min=False) lof_output.y_hat = {} lof_output.y_hat["SD"] = SD_y_hat lof_output.y_hat["MAD"] = MAD_y_hat lof_output.y_hat["IQR"] = IQR_y_hat if config.save_output == True: if not os.path.exists('./outputs/NPY/{}/'.format(config.dataset)): os.makedirs('./outputs/NPY/{}/'.format(config.dataset)) np.save( './outputs/NPY/{}/Score_LOF_hdim_{}_rollingsize_{}_{}_pid={}.npy'. format(config.dataset, config.n_neighbors, 1, train_filename.stem, config.pid), lof_output.negative_factor) np.save( './outputs/NPY/{}/Pred_LOF_hdim_{}_rollingsize_{}_{}_pid={}.npy'. format(config.dataset, config.n_neighbors, 1, train_filename.stem, config.pid), lof_output.y_hat) # %% if config.save_figure: if not os.path.exists('./figures/{}/'.format(config.dataset)): os.makedirs('./figures/{}/'.format(config.dataset)) if original_x_dim == 1: plt.figure(figsize=(9, 3)) plt.plot(abnormal_data, color='blue', lw=1.5) plt.title('Original Data') plt.grid(True) plt.tight_layout() # plt.show() plt.savefig( './figures/{}/Ori_LOF_hdim_{}_rollingsize_{}_{}_pid={}.png'. format(config.dataset, config.n_neighbors, 1, train_filename.stem, config.pid), dpi=300) plt.close() t = np.arange(0, abnormal_data.shape[0]) markercolors = [ 'blue' if i == 1 else 'red' for i in abnormal_label ] markersize = [4 if i == 1 else 25 for i in abnormal_label] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(abnormal_data, alpha=0.7) plt.scatter(t, abnormal_data, s=markersize, c=markercolors) # plt.show() plt.savefig( './figures/{}/VisInp_LOF_hdim_{}_rollingsize_{}_{}_pid={}.png'. format(config.dataset, config.n_neighbors, 1, train_filename.stem, config.pid), dpi=300) plt.close() markercolors = [ 'blue' if i == 1 else 'red' for i in lof_output.y_hat["SD"] ] markersize = [4 if i == 1 else 25 for i in lof_output.y_hat["SD"]] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(abnormal_data, alpha=0.7) plt.scatter(t, abnormal_data, s=markersize, c=markercolors) # plt.show() plt.savefig( './figures/{}/VisOut_LOF_hdim_{}_rollingsize_{}_SD_{}_pid={}.png' .format(config.dataset, config.n_neighbors, 1, train_filename.stem, config.pid), dpi=300) plt.close() markercolors = [ 'blue' if i == 1 else 'red' for i in lof_output.y_hat["MAD"] ] markersize = [4 if i == 1 else 25 for i in lof_output.y_hat["MAD"]] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(abnormal_data, alpha=0.7) plt.scatter(t, abnormal_data, s=markersize, c=markercolors) # plt.show() plt.savefig( './figures/{}/VisOut_LOF_hdim_{}_rollingsize_{}_MAD_{}_pid={}.png' .format(config.dataset, config.n_neighbors, 1, train_filename.stem, config.pid), dpi=300) plt.close() markercolors = [ 'blue' if i == 1 else 'red' for i in lof_output.y_hat["IQR"] ] markersize = [4 if i == 1 else 25 for i in lof_output.y_hat["IQR"]] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(abnormal_data, alpha=0.7) plt.scatter(t, abnormal_data, s=markersize, c=markercolors) plt.savefig( './figures/{}/VisOut_LOF_hdim_{}_rollingsize_{}_IQR_{}_pid={}.png' .format(config.dataset, config.n_neighbors, 1, train_filename.stem, config.pid), dpi=300) plt.close() else: file_logger.info('cannot plot image with x_dim > 1') if config.use_spot: pass else: pos_label = -1 TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {} for threshold_method in lof_output.y_hat: cm = confusion_matrix(y_true=abnormal_label, y_pred=lof_output.y_hat[threshold_method], labels=[1, -1]) TN[threshold_method] = cm[0][0] FP[threshold_method] = cm[0][1] FN[threshold_method] = cm[1][0] TP[threshold_method] = cm[1][1] precision[threshold_method] = precision_score( y_true=abnormal_label, y_pred=lof_output.y_hat[threshold_method], pos_label=pos_label) recall[threshold_method] = recall_score( y_true=abnormal_label, y_pred=lof_output.y_hat[threshold_method], pos_label=pos_label) f1[threshold_method] = f1_score( y_true=abnormal_label, y_pred=lof_output.y_hat[threshold_method], pos_label=pos_label) fpr, tpr, _ = roc_curve(y_true=abnormal_label, y_score=-lof_output.negative_factor, pos_label=pos_label) roc_auc = auc(fpr, tpr) pre, re, _ = precision_recall_curve( y_true=abnormal_label, probas_pred=-lof_output.negative_factor, pos_label=pos_label) pr_auc = auc(re, pre) metrics_result = MetricsResult(TN=TN, FP=FP, FN=FN, TP=TP, precision=precision, recall=recall, fbeta=f1, pr_auc=pr_auc, roc_auc=roc_auc) return metrics_result
def RunModel(train_filename, test_filename, label_filename, config, ratio): negative_sample = True if "noise" in config.dataset else False train_data, abnormal_data, abnormal_label = read_dataset( train_filename, test_filename, label_filename, normalize=True, file_logger=file_logger, negative_sample=negative_sample, ratio=ratio) original_x_dim = abnormal_data.shape[1] config.x_dim = abnormal_data.shape[1] Pab = [] for i in range(abnormal_data.shape[1]): ts = abnormal_data[:, i] Pab_i, _ = stomp(ts, config.pattern_size) Pab.append(np.nan_to_num(Pab_i)) Pab = np.sum(Pab, axis=0) # final_zscore = zscore(Pab) # np_decision = create_label_based_on_zscore(final_zscore, 2.5, True) #np_decision = create_label_based_on_quantile(-Pab, quantile=99) # higher -Pab is more likely to be anomalies. SD_Tmin, SD_Tmax = SD_autothreshold(-Pab) SD_y_hat = get_labels_by_threshold(-Pab, Tmax=SD_Tmax, use_max=True, use_min=False) MAD_Tmin, MAD_Tmax = MAD_autothreshold(-Pab) MAD_y_hat = get_labels_by_threshold(-Pab, Tmax=MAD_Tmax, use_max=True, use_min=False) IQR_Tmin, IQR_Tmax = IQR_autothreshold(-Pab) IQR_y_hat = get_labels_by_threshold(-Pab, Tmax=IQR_Tmax, use_max=True, use_min=False) np_decision = {} np_decision["SD"] = SD_y_hat np_decision["MAD"] = MAD_y_hat np_decision["IQR"] = IQR_y_hat if config.save_output: if not os.path.exists('./outputs/NPY/{}/'.format(config.dataset)): os.makedirs('./outputs/NPY/{}/'.format(config.dataset)) np.save( './outputs/NPY/{}/MP_hdim_None_rollingsize_{}_{}_pid={}.npy'. format(config.dataset, config.pattern_size, train_filename.stem, config.pid), Pab) # TODO metrics computation. # %% if config.save_figure: if original_x_dim == 1: plt.figure(figsize=(9, 3)) plt.plot(ts, color='blue', lw=1.5) plt.title('Original Data') plt.grid(True) plt.tight_layout() # plt.show() plt.savefig( './figures/{}/Ori_MP_hdim_None_rollingsize_{}_{}_pid={}.png'. format(config.dataset, config.pattern_size, train_filename.stem, config.pid), dpi=300) plt.close() # Plot decoder output plt.figure(figsize=(9, 3)) plt.plot(Pab, color='blue', lw=1.5) plt.title('Profile Output') plt.grid(True) plt.tight_layout() # plt.show() plt.savefig( './figures/{}/Profile_MP_hdim_None_rollingsize_{}_{}_pid={}.png' .format(config.dataset, config.pattern_size, train_filename.stem, config.pid), dpi=300) plt.close() t = np.arange(0, abnormal_data.shape[0]) markercolors = ['blue' for i in range(config.pattern_size - 1)] + [ 'blue' if i == 1 else 'red' for i in abnormal_label[config.pattern_size - 1:] ] markersize = [4 for i in range(config.pattern_size - 1)] + [ 4 if i == 1 else 25 for i in abnormal_label[config.pattern_size - 1:] ] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(np.squeeze(abnormal_data), alpha=0.7) plt.scatter(t, abnormal_data, s=markersize, c=markercolors) # plt.show() plt.savefig( './figures/{}/VisInp_MP_hdim_None_rollingsize_{}_{}_pid={}.png' .format(config.dataset, config.pattern_size, train_filename.stem, config.pid), dpi=600) plt.close() markercolors = [ 'blue' if i == 1 else 'red' for i in np_decision["SD"] ] + ['blue' for i in range(config.pattern_size - 1)] markersize = [4 if i == 1 else 25 for i in np_decision["SD"] ] + [4 for i in range(config.pattern_size - 1)] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(np.squeeze(abnormal_data), alpha=0.7) plt.scatter(t, abnormal_data, s=markersize, c=markercolors) # plt.show() plt.savefig( './figures/{}/VisOut_MP_hdim_None_rollingsize_{}_SD_{}_pid={}.png' .format(config.dataset, config.pattern_size, train_filename.stem, config.pid), dpi=300) plt.close() markercolors = [ 'blue' if i == 1 else 'red' for i in np_decision["MAD"] ] + ['blue' for i in range(config.pattern_size - 1)] markersize = [4 if i == 1 else 25 for i in np_decision["MAD"] ] + [4 for i in range(config.pattern_size - 1)] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(np.squeeze(abnormal_data), alpha=0.7) plt.scatter(t, abnormal_data, s=markersize, c=markercolors) # plt.show() plt.savefig( './figures/{}/VisOut_MP_hdim_None_rollingsize_{}_MAD_{}_pid={}.png' .format(config.dataset, config.pattern_size, train_filename.stem, config.pid), dpi=300) plt.close() markercolors = [ 'blue' if i == 1 else 'red' for i in np_decision["IQR"] ] + ['blue' for i in range(config.pattern_size - 1)] markersize = [4 if i == 1 else 25 for i in np_decision["IQR"] ] + [4 for i in range(config.pattern_size - 1)] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(np.squeeze(abnormal_data), alpha=0.7) plt.scatter(t, abnormal_data, s=markersize, c=markercolors) # plt.show() plt.savefig( './figures/{}/VisOut_MP_hdim_None_rollingsize_{}_IQR_{}_pid={}.png' .format(config.dataset, config.pattern_size, train_filename.stem, config.pid), dpi=300) plt.close() else: file_logger.info('cannot plot image with x_dim > 1') if config.use_spot: pass else: pos_label = -1 TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {} for threshold_method in np_decision: cm = confusion_matrix(y_true=abnormal_label[config.pattern_size - 1:], y_pred=np_decision[threshold_method], labels=[1, -1]) TN[threshold_method] = cm[0][0] FP[threshold_method] = cm[0][1] FN[threshold_method] = cm[1][0] TP[threshold_method] = cm[1][1] precision[threshold_method] = precision_score( y_true=abnormal_label[config.pattern_size - 1:], y_pred=np_decision[threshold_method], pos_label=pos_label) recall[threshold_method] = recall_score( y_true=abnormal_label[config.pattern_size - 1:], y_pred=np_decision[threshold_method], pos_label=pos_label) f1[threshold_method] = f1_score( y_true=abnormal_label[config.pattern_size - 1:], y_pred=np_decision[threshold_method], pos_label=pos_label) fpr, tpr, _ = roc_curve(y_true=abnormal_label[config.pattern_size - 1:], y_score=-Pab, pos_label=pos_label) roc_auc = auc(fpr, tpr) pre, re, _ = precision_recall_curve( y_true=abnormal_label[config.pattern_size - 1:], probas_pred=-Pab, pos_label=pos_label) pr_auc = auc(re, pre) metrics_result = MetricsResult(TN=TN, FP=FP, FN=FN, TP=TP, precision=precision, recall=recall, fbeta=f1, pr_auc=pr_auc, roc_auc=roc_auc) return metrics_result
def RunModel(train_filename, test_filename, label_filename, config, ratio, gpu_id): negative_sample = True if "noise" in config.dataset else False train_data, abnormal_data, abnormal_label = read_dataset( train_filename, test_filename, label_filename, normalize=True, file_logger=file_logger, negative_sample=negative_sample, ratio=ratio) original_x_dim = abnormal_data.shape[1] if abnormal_data.shape[0] < config.rolling_size: train_logger.warning( "test data is less than rolling_size! Ignore the current data!") TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {} for threshold_method in ["SD", "MAD", "IQR"]: TN[threshold_method] = -1 FP[threshold_method] = -1 FN[threshold_method] = -1 TP[threshold_method] = -1 precision[threshold_method] = -1 recall[threshold_method] = -1 f1[threshold_method] = -1 roc_auc = -1 pr_auc = -1 metrics_result = MetricsResult(TN=TN, FP=FP, FN=FN, TP=TP, precision=precision, recall=recall, fbeta=f1, pr_auc=pr_auc, roc_auc=roc_auc) return metrics_result rolling_train_data = None rolling_valid_data = None if config.preprocessing: if config.use_overlapping: if train_data is not None: rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = rolling_window_2D( train_data, config.rolling_size), rolling_window_2D( abnormal_data, config.rolling_size), rolling_window_2D( abnormal_label, config.rolling_size) train_split_idx = int(rolling_train_data.shape[0] * 0.7) rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[ train_split_idx:] else: rolling_abnormal_data, rolling_abnormal_label = rolling_window_2D( abnormal_data, config.rolling_size), rolling_window_2D( abnormal_label, config.rolling_size) else: if train_data is not None: rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = cutting_window_2D( train_data, config.rolling_size), cutting_window_2D( abnormal_data, config.rolling_size), cutting_window_2D( abnormal_label, config.rolling_size) train_split_idx = int(rolling_train_data.shape[0] * 0.7) rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[ train_split_idx:] else: rolling_abnormal_data, rolling_abnormal_label = cutting_window_2D( abnormal_data, config.rolling_size), cutting_window_2D( abnormal_label, config.rolling_size) if train_data is not None: rolling_train_data, rolling_valid_data, rolling_abnormal_data, rolling_abnormal_label = np.reshape( rolling_train_data, [rolling_train_data.shape[0], rolling_train_data.shape[1] * rolling_train_data.shape[2]]), \ np.reshape( rolling_train_data, [rolling_train_data.shape[0], rolling_train_data.shape[1] * rolling_train_data.shape[2]]), \ np.reshape( rolling_abnormal_data, [rolling_abnormal_data.shape[0], rolling_abnormal_data.shape[1] * rolling_abnormal_data.shape[2]]), \ np.reshape( rolling_abnormal_label, [rolling_abnormal_label.shape[0], rolling_abnormal_label.shape[1] * rolling_abnormal_label.shape[2]]) else: rolling_abnormal_data, rolling_abnormal_label = np.reshape( rolling_abnormal_data, [ rolling_abnormal_data.shape[0], rolling_abnormal_data.shape[1] * rolling_abnormal_data.shape[2] ]), np.reshape(rolling_abnormal_label, [ rolling_abnormal_label.shape[0], rolling_abnormal_label.shape[1] * rolling_abnormal_label.shape[2] ]) else: if train_data is not None: rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = train_data, abnormal_data, abnormal_label else: rolling_abnormal_data, rolling_abnormal_label = abnormal_data, abnormal_label config.x_dim = rolling_abnormal_data.shape[1] ensemble_error = [] ensemble_output = [] ensemble_TN = [] ensemble_TP = [] ensemble_FN = [] ensemble_FP = [] ensemble_PRECISION = [] ensemble_RECALL = [] ensemble_FBETA = [] ensemble_PR_AUC = [] ensemble_ROC_AUC = [] ensemble_CKS = [] # only to show the results for i in range(config.ensemble_space): train_logger.info('component #{}'.format(i)) model = RN(file_name=train_filename, config=config, gpu_id=gpu_id) model = model.to(device) if train_data is not None and config.robustness == False: rn_output = model.fit(train_input=rolling_train_data, train_label=rolling_train_data, valid_input=rolling_valid_data, valid_label=rolling_valid_data, test_input=rolling_abnormal_data, test_label=rolling_abnormal_label, abnormal_data=abnormal_data, abnormal_label=abnormal_label, original_x_dim=original_x_dim) elif train_data is None or config.robustness == True: rn_output = model.fit(train_input=rolling_abnormal_data, train_label=rolling_abnormal_data, valid_input=rolling_valid_data, valid_label=rolling_valid_data, test_input=rolling_abnormal_data, test_label=rolling_abnormal_label, abnormal_data=abnormal_data, abnormal_label=abnormal_label, original_x_dim=original_x_dim) # %% min_max_scaler = preprocessing.MinMaxScaler() if config.preprocessing: if config.use_overlapping: if config.use_last_point: dec_mean_unroll = np.reshape( rn_output.dec_means.detach().cpu().numpy(), (-1, config.rolling_size, original_x_dim))[:, -1] dec_mean_unroll = min_max_scaler.fit_transform( dec_mean_unroll) x_original_unroll = abnormal_data[config.rolling_size - 1:] else: dec_mean_unroll = unroll_window_3D( np.reshape( rn_output.dec_means.detach().cpu().numpy(), (-1, config.rolling_size, original_x_dim)))[::-1] dec_mean_unroll = min_max_scaler.fit_transform( dec_mean_unroll) x_original_unroll = abnormal_data[:dec_mean_unroll. shape[0]] else: dec_mean_unroll = np.reshape( rn_output.dec_means.detach().cpu().numpy(), (-1, original_x_dim)) dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll) x_original_unroll = abnormal_data[:dec_mean_unroll.shape[0]] else: dec_mean_unroll = rn_output.dec_means.detach().cpu().numpy() dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll) x_original_unroll = abnormal_data if config.save_output: np.save( './save_outputs/NPY/{}/Dec_RN_{}_{}_pid={}.npy'.format( config.dataset, Path(train_filename).stem, i, config.pid), dec_mean_unroll) error = np.sum(abnormal_data[:dec_mean_unroll.shape[0]] - np.reshape(dec_mean_unroll, [-1, original_x_dim]), axis=1)**2 ensemble_error.append(error) ensemble_output.append(dec_mean_unroll) ensemble_FN.append(rn_output.best_FN) ensemble_TN.append(rn_output.best_TN) ensemble_FP.append(rn_output.best_FP) ensemble_TP.append(rn_output.best_TP) ensemble_PRECISION.append(rn_output.best_precision) ensemble_RECALL.append(rn_output.best_recall) ensemble_FBETA.append(rn_output.best_fbeta) ensemble_PR_AUC.append(rn_output.best_pr_auc) ensemble_ROC_AUC.append(rn_output.best_roc_auc) ensemble_CKS.append(rn_output.best_cks) error = np.stack(ensemble_error, axis=0) error = np.median(error, axis=0) dec_mean_unroll = np.stack(ensemble_output, axis=0) dec_mean_unroll = np.median(dec_mean_unroll, axis=0) SD_Tmin, SD_Tmax = SD_autothreshold(error) SD_y_hat = get_labels_by_threshold(error, Tmax=SD_Tmax, use_max=True, use_min=False) MAD_Tmin, MAD_Tmax = MAD_autothreshold(error) MAD_y_hat = get_labels_by_threshold(error, Tmax=MAD_Tmax, use_max=True, use_min=False) IQR_Tmin, IQR_Tmax = IQR_autothreshold(error) IQR_y_hat = get_labels_by_threshold(error, Tmax=IQR_Tmax, use_max=True, use_min=False) np_decision = {} np_decision["SD"] = SD_y_hat np_decision["MAD"] = MAD_y_hat np_decision["IQR"] = IQR_y_hat # TODO metrics computation. # TODO save output if config.save_output: np.save( './save_outputs/NPY/{}/Dec_RN_{}_pid={}.npy'.format( config.dataset, Path(train_filename).stem, config.pid), dec_mean_unroll) np.save( './save_outputs/NPY/{}/Error_RN_{}_pid={}.npy'.format( config.dataset, Path(train_filename).stem, config.pid), error) # %% if config.save_figure: if original_x_dim == 1: plt.figure(figsize=(9, 3)) plt.plot(x_original_unroll, color='blue', lw=1.5) plt.title('Original Data') plt.grid(True) plt.tight_layout() # plt.show() plt.savefig('./save_figures/{}/Ori_RN_{}_pid={}.png'.format( config.dataset, Path(train_filename).stem, config.pid), dpi=300) plt.close() # Plot decoder output plt.figure(figsize=(9, 3)) plt.plot(dec_mean_unroll, color='blue', lw=1.5) plt.title('Decoding Output') plt.grid(True) plt.tight_layout() # plt.show() plt.savefig('./save_figures/{}/Dec_RN_{}_pid={}.png'.format( config.dataset, Path(train_filename).stem, config.pid), dpi=300) plt.close() t = np.arange(0, abnormal_data.shape[0]) markercolors = [ 'blue' if i == 1 else 'red' for i in abnormal_label[:dec_mean_unroll.shape[0]] ] markersize = [ 4 if i == 1 else 25 for i in abnormal_label[:dec_mean_unroll.shape[0]] ] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(abnormal_data[:dec_mean_unroll.shape[0]], alpha=0.7) plt.scatter(t[:dec_mean_unroll.shape[0]], x_original_unroll[:np_decision.shape[0]], s=markersize, c=markercolors) # plt.show() plt.savefig('./save_figures/{}/VisInp_RN_{}_pid={}.png'.format( config.dataset, Path(train_filename).stem, config.pid), dpi=300) plt.close() markercolors = ['blue' for i in range(config.rolling_size - 1)] + [ 'blue' if i == 1 else 'red' for i in np_decision ] markersize = [4 for i in range(config.rolling_size - 1) ] + [4 if i == 1 else 25 for i in np_decision] plt.figure(figsize=(9, 3)) ax = plt.axes() plt.yticks([0, 0.25, 0.5, 0.75, 1]) ax.set_xlim(t[0] - 10, t[-1] + 10) ax.set_ylim(-0.10, 1.10) plt.xlabel('$t$') plt.ylabel('$s$') plt.grid(True) plt.tight_layout() plt.margins(0.1) plt.plot(abnormal_data, alpha=0.7) plt.scatter(t, abnormal_data, s=markersize, c=markercolors) # plt.show() plt.savefig('./save_figures/{}/VisOut_RN_{}_pid={}.png'.format( config.dataset, Path(train_filename).stem, config.pid), dpi=300) plt.close() else: file_logger.info('cannot plot image with x_dim > 1') if config.use_spot: pass else: try: pos_label = -1 TN, FP, FN, TP, precision, recall, f1, cks = {}, {}, {}, {}, {}, {}, {}, {} for threshold_method in np_decision: cm = confusion_matrix( y_true=abnormal_label[config.rolling_size - 1:], y_pred=np_decision[threshold_method], labels=[1, -1]) TN[threshold_method] = cm[0][0] FP[threshold_method] = cm[0][1] FN[threshold_method] = cm[1][0] TP[threshold_method] = cm[1][1] precision[threshold_method] = precision_score( y_true=abnormal_label[config.rolling_size - 1:], y_pred=np_decision[threshold_method], pos_label=pos_label) recall[threshold_method] = recall_score( y_true=abnormal_label[config.rolling_size - 1:], y_pred=np_decision[threshold_method], pos_label=pos_label) f1[threshold_method] = f1_score( y_true=abnormal_label[config.rolling_size - 1:], y_pred=np_decision[threshold_method], pos_label=pos_label) cks[threshold_method] = cohen_kappa_score( y1=abnormal_label[config.rolling_size - 1:], y2=np_decision[threshold_method]) fpr, tpr, _ = roc_curve(y_true=abnormal_label[config.rolling_size - 1:], y_score=np.nan_to_num(error), pos_label=pos_label) roc_auc = auc(fpr, tpr) pre, re, _ = precision_recall_curve( y_true=abnormal_label[config.rolling_size - 1:], probas_pred=np.nan_to_num(error), pos_label=pos_label) pr_auc = auc(re, pre) metrics_result = MetricsResult( TN=TN, FP=FP, FN=FN, TP=TP, precision=precision, recall=recall, fbeta=f1, pr_auc=pr_auc, roc_auc=roc_auc, best_TN=rn_output.best_TN, best_FP=rn_output.best_FP, best_FN=rn_output.best_FN, best_TP=rn_output.best_TP, best_precision=rn_output.best_precision, best_recall=rn_output.best_recall, best_fbeta=rn_output.best_fbeta, best_pr_auc=rn_output.best_pr_auc, best_roc_auc=rn_output.best_roc_auc, best_cks=rn_output.best_cks, min_valid_loss=rn_output.min_valid_loss, testing_time=rn_output.testing_time, training_time=rn_output.training_time, memory_usage_nvidia=rn_output.memory_usage_nvidia) return metrics_result except: pass
def RunModel(train_filename, test_filename, label_filename, config, ratio): negative_sample = True if "noise" in config.dataset else False train_data, abnormal_data, abnormal_label = read_dataset( train_filename, test_filename, label_filename, normalize=True, file_logger=file_logger, negative_sample=negative_sample, ratio=ratio) if abnormal_data.shape[0] < config.rolling_size: train_logger.warning( "test data is less than rolling_size! Ignore the current data!") TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {} for threshold_method in ["SD", "MAD", "IQR"]: TN[threshold_method] = -1 FP[threshold_method] = -1 FN[threshold_method] = -1 TP[threshold_method] = -1 precision[threshold_method] = -1 recall[threshold_method] = -1 f1[threshold_method] = -1 roc_auc = -1 pr_auc = -1 metrics_result = MetricsResult(TN=TN, FP=FP, FN=FN, TP=TP, precision=precision, recall=recall, fbeta=f1, pr_auc=pr_auc, roc_auc=roc_auc) return metrics_result original_x_dim = abnormal_data.shape[1] rolling_train_data = None rolling_valid_data = None if config.preprocessing: if config.use_overlapping: if train_data is not None: rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = rolling_window_2D( train_data, config.rolling_size), rolling_window_2D( abnormal_data, config.rolling_size), rolling_window_2D( abnormal_label, config.rolling_size) train_split_idx = int(rolling_train_data.shape[0] * 0.7) rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[ train_split_idx:] else: rolling_abnormal_data, rolling_abnormal_label = rolling_window_2D( abnormal_data, config.rolling_size), rolling_window_2D( abnormal_label, config.rolling_size) else: if train_data is not None: rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = cutting_window_2D( train_data, config.rolling_size), cutting_window_2D( abnormal_data, config.rolling_size), cutting_window_2D( abnormal_label, config.rolling_size) train_split_idx = int(rolling_train_data.shape[0] * 0.7) rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[ train_split_idx:] else: rolling_abnormal_data, rolling_abnormal_label = cutting_window_2D( abnormal_data, config.rolling_size), cutting_window_2D( abnormal_label, config.rolling_size) else: if train_data is not None: rolling_train_data, rolling_abnormal_data, rolling_abnormal_label = np.expand_dims( train_data, axis=0), np.expand_dims(abnormal_data, axis=0), np.expand_dims(abnormal_label, axis=0) train_split_idx = int(rolling_train_data.shape[0] * 0.7) rolling_train_data, rolling_valid_data = rolling_train_data[:train_split_idx], rolling_train_data[ train_split_idx:] else: rolling_abnormal_data, rolling_abnormal_label = np.expand_dims( abnormal_data, axis=0), np.expand_dims(abnormal_label, axis=0) config.x_dim = rolling_abnormal_data.shape[1] model = CAE(file_name=train_filename, config=config) model = model.to(device) cae_output = None if train_data is not None and config.robustness == False: cae_output = model.fit(train_input=rolling_train_data, train_label=rolling_train_data, valid_input=rolling_valid_data, valid_label=rolling_valid_data, test_input=rolling_abnormal_data, test_label=rolling_abnormal_label, abnormal_data=abnormal_data, abnormal_label=abnormal_label, original_x_dim=original_x_dim) elif train_data is None or config.robustness == True: cae_output = model.fit(train_input=rolling_abnormal_data, train_label=rolling_abnormal_data, valid_input=rolling_valid_data, valid_label=rolling_valid_data, test_input=rolling_abnormal_data, test_label=rolling_abnormal_label, abnormal_data=abnormal_data, abnormal_label=abnormal_label, original_x_dim=original_x_dim) # %% min_max_scaler = preprocessing.MinMaxScaler() if config.preprocessing: if config.use_overlapping: if config.use_last_point: dec_mean_unroll = cae_output.dec_means.detach().cpu().numpy( )[:, -1] dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll) x_original_unroll = abnormal_data[config.rolling_size - 1:] else: dec_mean_unroll = unroll_window_3D( np.reshape( cae_output.dec_means.detach().cpu().numpy(), (-1, config.rolling_size, original_x_dim)))[::-1] dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll) x_original_unroll = abnormal_data[:dec_mean_unroll.shape[0]] else: dec_mean_unroll = np.reshape( cae_output.dec_means.detach().cpu().numpy(), (-1, original_x_dim)) dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll) x_original_unroll = abnormal_data[:dec_mean_unroll.shape[0]] else: dec_mean_unroll = cae_output.dec_means.detach().cpu().numpy() dec_mean_unroll = np.transpose(np.squeeze(dec_mean_unroll, axis=0)) dec_mean_unroll = min_max_scaler.fit_transform(dec_mean_unroll) x_original_unroll = abnormal_data if config.save_output: if not os.path.exists('./outputs/NPY/{}/'.format(config.dataset)): os.makedirs('./outputs/NPY/{}/'.format(config.dataset)) np.save( './outputs/NPY/{}/Dec_CAE_hdim_{}_rollingsize_{}_{}_pid={}.npy'. format(config.dataset, config.h_dim, config.rolling_size, Path(train_filename).stem, config.pid), dec_mean_unroll) error = np.sum(x_original_unroll - np.reshape(dec_mean_unroll, [-1, original_x_dim]), axis=1)**2 # final_zscore = zscore(error) # np_decision = create_label_based_on_zscore(final_zscore, 2.5, True) # np_decision = create_label_based_on_quantile(error, quantile=99) SD_Tmin, SD_Tmax = SD_autothreshold(error) SD_y_hat = get_labels_by_threshold(error, Tmax=SD_Tmax, use_max=True, use_min=False) MAD_Tmin, MAD_Tmax = MAD_autothreshold(error) MAD_y_hat = get_labels_by_threshold(error, Tmax=MAD_Tmax, use_max=True, use_min=False) IQR_Tmin, IQR_Tmax = IQR_autothreshold(error) IQR_y_hat = get_labels_by_threshold(error, Tmax=IQR_Tmax, use_max=True, use_min=False) np_decision = {} np_decision["SD"] = SD_y_hat np_decision["MAD"] = MAD_y_hat np_decision["IQR"] = IQR_y_hat # TODO metrics computation. # %% if config.save_figure: file_logger.info('save_figure has been dropped.') if config.use_spot: pass else: pos_label = -1 TN, FP, FN, TP, precision, recall, f1 = {}, {}, {}, {}, {}, {}, {} for threshold_method in np_decision: cm = confusion_matrix(y_true=abnormal_label, y_pred=np_decision[threshold_method], labels=[1, -1]) TN[threshold_method] = cm[0][0] FP[threshold_method] = cm[0][1] FN[threshold_method] = cm[1][0] TP[threshold_method] = cm[1][1] precision[threshold_method] = precision_score( y_true=abnormal_label, y_pred=np_decision[threshold_method], pos_label=pos_label) recall[threshold_method] = recall_score( y_true=abnormal_label, y_pred=np_decision[threshold_method], pos_label=pos_label) f1[threshold_method] = f1_score( y_true=abnormal_label, y_pred=np_decision[threshold_method], pos_label=pos_label) fpr, tpr, _ = roc_curve(y_true=abnormal_label, y_score=np.nan_to_num(error), pos_label=pos_label) roc_auc = auc(fpr, tpr) pre, re, _ = precision_recall_curve(y_true=abnormal_label, probas_pred=np.nan_to_num(error), pos_label=pos_label) pr_auc = auc(re, pre) metrics_result = MetricsResult( TN=TN, FP=FP, FN=FN, TP=TP, precision=precision, recall=recall, fbeta=f1, pr_auc=pr_auc, roc_auc=roc_auc, best_TN=cae_output.best_TN, best_FP=cae_output.best_FP, best_FN=cae_output.best_FN, best_TP=cae_output.best_TP, best_precision=cae_output.best_precision, best_recall=cae_output.best_recall, best_fbeta=cae_output.best_fbeta, best_pr_auc=cae_output.best_pr_auc, best_roc_auc=cae_output.best_roc_auc, best_cks=cae_output.best_cks, min_valid_loss=cae_output.min_valid_loss) return metrics_result