def single_test(): dataset = Dataset.MNIST_ODDS aggregator = Aggregator.AVERAGE_THRESHOLD threshold = 0 X, Y = DataLoader.load(dataset) X = X[:, np.std(X, axis=0) != 0] dim = X.shape[1] neigh = max(10, int(np.floor(0.03 * X.shape[0]))) ENSEMBLE_SIZE = 100 logger.info(f"{dataset} {aggregator} {threshold}") roc_aucs = [] precision_at_ns = [] mdl = OracleAdaptive(2, dim / 4, ENSEMBLE_SIZE, aggregator, neigh, kNN.NAME, Y, threshold) for _ in tqdm.trange(1): try: rst = mdl.compute_ensemble_components(X) roc_auc = mdl.compute_roc_auc(rst, Y) logger.info("Final ROC {}".format(roc_auc)) precision_at_n = mdl.compute_precision_at_n(rst, Y) logger.info("Precision@n {}".format(precision_at_n)) roc_aucs.append(roc_auc) precision_at_ns.append(precision_at_n) except Exception as e: logger.exception(e) logger.info(f"Exp Information {dataset} {aggregator} {threshold}") logger.info(f"Final Average ROC {np.mean(roc_aucs)}") logger.info(f"Final Precision@n {np.mean(precision_at_ns)}") logger.info(f"====================================================")
def test(): X, Y = DataLoader.load(Dataset.MUSK) neigh = max(10, int(np.floor(0.03 * X.shape[0]))) knn = kNN(neigh, if_normalize=False) rst = knn.fit(X) y_scores = np.array(rst) print(y_scores) roc_auc = roc_auc_score(Y, y_scores) print(roc_auc)
def outliers_per_subspace(): import json outputs = defaultdict(dict) model = 'gke' for dataset in [Dataset.GLASS, Dataset.WINE, Dataset.BREASTW, Dataset.ANNTHYROID, Dataset.VOWELS, Dataset.PIMA, Dataset.THYROID]: logger.info("=" * 50) logger.info(f" Dataset {dataset} ") logger.info("=" * 50) _X, Y = DataLoader.load(dataset) outlier_num, inlier_num = np.sum(Y == 1), np.sum(Y == 0) feature_index = np.array([i for i in range(_X.shape[1])]) if model == "knn": mdl = kNN(max(10, int(np.floor(0.03 * _X.shape[0]))), Normalize.ZSCORE) X_gpu_tensor = _X elif model == "gke": mdl = GKE_GPU(Normalize.ZSCORE) X_gpu_tensor = GKE_GPU.convert_to_tensor(_X) model_outputs = [] selected_features = [] for l in range(1, len(feature_index) + 1): for i in combinations(feature_index, l): model_outputs.append( (i, mdl.fit(X_gpu_tensor[:, np.asarray(i)])) ) logger.info(f"Total model {len(model_outputs)}") for name, aggregator, threshold in [("RANK", Aggregator.count_rank_threshold, 0.05), ("RANK", Aggregator.count_rank_threshold, 0.10), ("STD", Aggregator.count_std_threshold, 1), ("STD", Aggregator.count_std_threshold, 2)]: logger.info(f"---------------{name}------------------------") outlier_num_per_subspace = [] for selected_features, i in model_outputs: y_scores = np.array(aggregator([i, ], threshold)) outlier_num_per_subspace.append(int(np.sum(y_scores[Y == 1]))) outputs[f"{name}_{threshold}"][dataset] = { "outlier_dist": outlier_num_per_subspace, "outlier_total": int(outlier_num), "subspace_total": len(model_outputs) } total_score = Aggregator.count_rank_threshold(model_outputs) for idx, i in enumerate(Y): if i == 1 and total_score[i] == 0: print("FN Outliers", X_gpu_tensor[idx]) print("Inliers", X_gpu_tensor[Y == 0]) output_file = f"{model}_outliers_per_subspace.json" with open(output_file, "w") as w: w.write(f"{json.dumps(outputs)}\n") logger.info(f"Output file {output_file}")
def compare_auc(): outputs = defaultdict(dict) # model = "knn" model_name = "gke" for dataset in [ Dataset.VOWELS, Dataset.WINE, Dataset.BREASTW, Dataset.ANNTHYROID, Dataset.GLASS, Dataset.PIMA, Dataset.THYROID ]: logger.info("=" * 50) logger.info(f" Dataset {dataset} ") logger.info("=" * 50) _X, Y = DataLoader.load(dataset) feature_index = np.array([i for i in range(_X.shape[1])]) if model_name == "knn": X_gpu_tensor = _X mdl = kNN(max(10, int(np.floor(0.03 * _X.shape[0]))), Normalize.ZSCORE) elif model_name == "gke": X_gpu_tensor = GKE_GPU.convert_to_tensor(_X) mdl = GKE_GPU(Normalize.ZSCORE) model_outputs = [] for l in range(1, len(feature_index) + 1): for i in combinations(feature_index, l): model_outputs.append(mdl.fit(X_gpu_tensor[:, np.asarray(i)])) logger.info(f"Total model {len(model_outputs)}") for name, aggregator, threshold in [ ("RANK", Aggregator.count_rank_threshold, 0.05), ("RANK", Aggregator.count_rank_threshold, 0.10), ("STD", Aggregator.count_std_threshold, 1), ("STD", Aggregator.count_std_threshold, 2), ("AVG", Aggregator.average, None), ("AVG", Aggregator.average_threshold, 1), ("AVG", Aggregator.average_threshold, 2), ]: if threshold is not None: y_scores = np.array(aggregator(model_outputs, threshold)) else: y_scores = np.array(aggregator(model_outputs)) roc = roc_auc_score(Y, y_scores) precision = precision_n_scores(Y, y_scores) logger.info( f"ROC of {name}-{threshold} {roc} Precision {precision}") outputs[dataset][f"{name}_{threshold}"] = { "roc": roc, "precision": precision } output_file = f"{model_name}_performance.json" with open(output_file, "w") as w: w.write(f"{json.dumps(outputs)}\n") logger.info(f"Output file {output_file}")
def load_model_and_data(dataset, model): logger.info("=" * 50) logger.info(f" Dataset {dataset} ") logger.info("=" * 50) _X, Y = DataLoader.load(dataset) outlier_num = int(np.sum(Y == 1)) feature_index = np.array(range(_X.shape[1])) if model == "knn": mdl = kNN(max(10, int(np.floor(0.03 * _X.shape[0]))), Normalize.ZSCORE) X_gpu_tensor = _X elif model == "gke": mdl = GKE_GPU(Normalize.ZSCORE) X_gpu_tensor = GKE_GPU.convert_to_tensor(_X) return mdl, X_gpu_tensor, Y, outlier_num, feature_index
def test_greedy_threshold(): for dataset in [ Dataset.ARRHYTHMIA, Dataset.OPTDIGITS, Dataset.MUSK, Dataset.MNIST_ODDS ]: for aggregator in [ Aggregator.COUNT_STD_THRESHOLD, Aggregator.AVERAGE_THRESHOLD, Aggregator.AVERAGE, Aggregator.COUNT_RANK_THRESHOLD ]: for threshold in [0, 0.5, 0.7, 0.9]: X, Y = DataLoader.load(dataset) dim = X.shape[1] neigh = max(10, int(np.floor(0.03 * X.shape[0]))) ENSEMBLE_SIZE = 100 logger.info(f"{dataset} {aggregator} {threshold}") roc_aucs = [] precision_at_ns = [] mdl = GreedyVariance(1, dim / 2, ENSEMBLE_SIZE, aggregator, neigh, kNN.NAME, Y, threshold) for _ in tqdm.trange(5): try: rst = mdl.run(X) roc_auc = mdl.compute_roc_auc(rst, Y) logger.info("Final ROC {}".format(roc_auc)) precision_at_n = mdl.compute_precision_at_n(rst, Y) logger.info("Precision@n {}".format(precision_at_n)) roc_aucs.append(roc_auc) precision_at_ns.append(precision_at_n) except Exception as e: logger.exception(e) logger.info( f"Exp Information {dataset} {aggregator} {threshold}") logger.info(f"Final Average ROC {np.mean(roc_aucs)}") logger.info(f"Final Precision@n {np.mean(precision_at_ns)}") logger.info( f"====================================================")
return Aggregator.count_rank_threshold(model_outputs, 100) elif self.aggregate_method == Aggregator.AVERAGE: return Aggregator.average(model_outputs) elif self.aggregate_method == Aggregator.COUNT_STD_THRESHOLD: return Aggregator.count_std_threshold(model_outputs, 2) elif self.aggregate_method == Aggregator.AVERAGE_THRESHOLD: return Aggregator.average_threshold(model_outputs, 2) if __name__ == '__main__': from sood.data_process.data_loader import Dataset, DataLoader ENSEMBLE_SIZE = 100 EXP_NUM = 1 X, Y = DataLoader.load(Dataset.MNIST_ODDS) neigh = max(10, int(np.floor(0.03 * X.shape[0]))) # X = X[:, np.std(X, axis=0) != 0] dim = X.shape[1] for start, end in [(2, int(dim / 4))]: fb = Uniform(start, end, ENSEMBLE_SIZE, Aggregator.AVERAGE, neigh, kNN.NAME) start_ts = time.time() roc_aucs = [] precision_at_ns = [] for i in range(EXP_NUM): rst = fb.run(X)
def outlier_correlation_subspace(): import json outputs = defaultdict(dict) model = 'gke' for dataset in [ Dataset.VOWELS, Dataset.WINE, Dataset.BREASTW, Dataset.ANNTHYROID, Dataset.GLASS, Dataset.PIMA, Dataset.THYROID ]: logger.info("=" * 50) logger.info(f" Dataset {dataset} ") logger.info("=" * 50) _X, Y = DataLoader.load(dataset) outlier_num = np.sum(Y == 1) feature_index = np.array(range(_X.shape[1])) if model == "knn": mdl = kNN(max(10, int(np.floor(0.03 * _X.shape[0]))), Normalize.ZSCORE) X_gpu_tensor = _X elif model == "gke": mdl = GKE_GPU(Normalize.ZSCORE) X_gpu_tensor = GKE_GPU.convert_to_tensor(_X) model_outputs = [] subspace_idx_to_feautres = [] for l in range(1, len(feature_index) + 1): for i in combinations(feature_index, l): model_outputs.append(mdl.fit(X_gpu_tensor[:, np.asarray(i)])) subspace_idx_to_feautres.append([int(j) for j in i]) logger.info(f"Total model {len(model_outputs)}") for name, aggregator, threshold in [ ("RANK", Aggregator.count_rank_threshold, 0.05), ("RANK", Aggregator.count_rank_threshold, 0.10), ("STD", Aggregator.count_std_threshold, 1), ("STD", Aggregator.count_std_threshold, 2) ]: outliers_to_subspaces = defaultdict(set) subspace_to_outlier = {} for subspace_id, model_output in enumerate(model_outputs): detected_outliers = { point_idx for point_idx, if_outlier in enumerate( aggregator([ model_output, ], threshold)) if if_outlier == 1 and Y[point_idx] == 1 } subspace_to_outlier[subspace_id] = detected_outliers for detected_outlier in detected_outliers: outliers_to_subspaces[detected_outlier].add(subspace_id) _subspace_to_outlier = { i: copy.deepcopy(j) for i, j in subspace_to_outlier.items() } not_covered_outliers = { i for i, subspaces in outliers_to_subspaces.items() if len(subspaces) > 0 } not_covered_outliers_num = len(not_covered_outliers) logger.info( f"Detected outliers {len(not_covered_outliers)}/{outlier_num}") selected_subspaces = [] while len(not_covered_outliers) > 0: _tmp = sorted(subspace_to_outlier.items(), key=lambda x: len(x[1]), reverse=True) selected_subspace_id, covered_outliers = \ sorted(subspace_to_outlier.items(), key=lambda x: len(x[1]), reverse=True)[0] not_covered_outliers = not_covered_outliers - covered_outliers subspace_to_outlier = { i: (j - covered_outliers) for i, j in subspace_to_outlier.items() } selected_subspaces.append(selected_subspace_id) for i in selected_subspaces: print( f"Features {subspace_idx_to_feautres[i]} Outliers {len(_subspace_to_outlier[i])}" ) print(f"{len(selected_subspaces)}/{len(model_outputs)}") outputs[f"{name}_{threshold}"][dataset] = { "select_subspace": [(subspace_idx_to_feautres[i], list(_subspace_to_outlier[i])) for i in selected_subspaces], "outliers": not_covered_outliers_num, "total_subspace": len(model_outputs), "total_outliers": int(outlier_num), "dimension": len(feature_index) } output_file = f"{model}_outliers_correlation_subspace.json" with open(output_file, "w") as w: w.write(f"{json.dumps(outputs)}\n") logger.info(f"Output file {output_file}")
def compute_ensemble_components(self, data_array): detector_list = [] feature_index = np.array([i for i in range(data_array.shape[1])]) for i in range(self.ensemble_size): # Randomly sample feature size feature_size = np.random.randint(self.dim_start, self.dim_end) # Randomly select features selected_features = np.random.choice(feature_index, feature_size) detector_list.append(kNN_LSCP(neighbor_size=self.neighbor, selected_features=selected_features)) clf = LSCP(detector_list) clf.fit(data_array) score = clf.decision_scores_ return [score, ] def aggregate_components(self, model_outputs): return model_outputs[0] if __name__ == '__main__': X, Y = DataLoader.load(Dataset.OPTDIGITS) dim = X.shape[1] neigh = max(10, int(np.floor(0.03 * X.shape[0]))) ENSEMBLE_SIZE = 100 mdl = Lscp(1, dim / 2, ENSEMBLE_SIZE, neigh) rst = mdl.run(X) roc_auc = mdl.compute_roc_auc(rst, Y) print(f"Final ROC {roc_auc}")
except Exception as e: logger.exception(e) logger.info( f"Exp Information {dataset} {aggregator} {threshold}") logger.info(f"Final Average ROC {np.mean(roc_aucs)}") logger.info(f"Final Precision@n {np.mean(precision_at_ns)}") logger.info( f"====================================================") if __name__ == '__main__': dataset = Dataset.MNIST_ODDS aggregator = Aggregator.COUNT_STD_THRESHOLD threshold = 0 X, Y = DataLoader.load(dataset) dim = X.shape[1] neigh = max(10, int(np.floor(0.03 * X.shape[0]))) ENSEMBLE_SIZE = 100 logger.info(f"{dataset} {aggregator} {threshold}") roc_aucs = [] precision_at_ns = [] mdl = GreedyVariance(1, dim / 2, ENSEMBLE_SIZE, aggregator, neigh, kNN.NAME, Y, threshold) for _ in tqdm.trange(1): try: rst = mdl.run(X) roc_auc = mdl.compute_roc_auc(rst, Y) logger.info("Final ROC {}".format(roc_auc)) precision_at_n = mdl.compute_precision_at_n(rst, Y) logger.info("Precision@n {}".format(precision_at_n))
def batch_test(): import json path_manager = PathManager() ENSEMBLE_SIZE = 100 for dataset in [ Dataset.ARRHYTHMIA, Dataset.MUSK, Dataset.MNIST_ODDS, Dataset.OPTDIGITS ]: for aggregator in [ Aggregator.AVERAGE, Aggregator.AVERAGE_THRESHOLD, Aggregator.COUNT_STD_THRESHOLD, Aggregator.COUNT_RANK_THRESHOLD ]: for base_model in [ kNN.NAME, ]: # ======================================================================================= # Model output_path = path_manager.get_batch_test_model_output( FB.NAME, aggregator, base_model, "DEFAULT", dataset) # ======================================================================================= with open(output_path, "w") as w: for threshold in [ 0, ]: X, Y = DataLoader.load(dataset) dim = X.shape[1] neigh = max(10, int(np.floor(0.03 * X.shape[0]))) logger.info(f"{dataset} {aggregator} {threshold}") roc_aucs = [] precision_at_ns = [] # ======================================================================================= # Model mdl = FB(2, dim / 4, ENSEMBLE_SIZE, aggregator, neigh, base_model, Y, threshold) # ======================================================================================= for _ in tqdm.trange(5): try: rst = mdl.run(X) # Throw exception if no satisfied subspaces are found roc_auc = mdl.compute_roc_auc(rst, Y) logger.info("Final ROC {}".format(roc_auc)) precision_at_n = mdl.compute_precision_at_n( rst, Y) logger.info( "Precision@n {}".format(precision_at_n)) roc_aucs.append(roc_auc) precision_at_ns.append(precision_at_n) except Exception as e: logger.exception(e) logger.info( f"Exp Information {dataset} {aggregator} {threshold}" ) logger.info(f"Final Average ROC {np.mean(roc_aucs)}") logger.info( f"Final Precision@n {np.mean(precision_at_ns)}") logger.info( f"====================================================" ) output = { Consts.DATA: dataset, Consts.ROC_AUC: np.mean(roc_aucs), Consts.PRECISION_A_N: np.mean(precision_at_ns), Consts.AGGREGATE: aggregator, Consts.BASE_MODEL: base_model, Consts.START_DIM: 2, Consts.END_DIM: 1 / 4, Consts.ENSEMBLE_SIZE: ENSEMBLE_SIZE } w.write(f"{json.dumps(output)}\n") logger.info(f"Output file is {output_path}")
def experiment(model, dim_boundary, threshold): import json path_manager = PathManager() ENSEMBLE_SIZE = 100 if model == "fb": Model = FB elif model == "oracle": Model = OracleAdaptive else: raise Exception(f"Model not supported {model}") threshold = float(threshold) for dataset in [Dataset.OPTDIGITS, Dataset.MNIST_ODDS, Dataset.MUSK, Dataset.ARRHYTHMIA, Dataset.AD, Dataset.AID362, Dataset.BANK, Dataset.PROB, Dataset.U2R]: for aggregator in [ Aggregator.COUNT_RANK_THRESHOLD, ]: for base_model in [kNN.NAME, ]: X, Y = DataLoader.load(dataset) dim = X.shape[1] neigh = max(10, int(np.floor(0.03 * X.shape[0]))) logger.info(f"{dataset} {aggregator} {threshold}") roc_aucs = [] precision_at_ns = [] # ======================================================================================= # Model if dim_boundary == "high": start_dim = dim / 2 end_dim = dim else: start_dim = 2 end_dim = dim / 4 mdl = Model(start_dim, end_dim, ENSEMBLE_SIZE, aggregator, neigh, base_model, Y, threshold) output_path = path_manager.get_batch_test_model_output(Model.NAME, aggregator, base_model, "DEFAULT", dataset, start_dim, end_dim) logger.info(f"Output File {output_path}") # ======================================================================================= for _ in tqdm.trange(5): try: rst = mdl.run(X) # Throw exception if no satisfied subspaces are found roc_auc = mdl.compute_roc_auc(rst, Y) logger.info("Final ROC {}".format(roc_auc)) precision_at_n = mdl.compute_precision_at_n(rst, Y) logger.info("Precision@n {}".format(precision_at_n)) roc_aucs.append(roc_auc) precision_at_ns.append(precision_at_n) except Exception as e: logger.exception(e) logger.info(f"Exp Information {dataset} {aggregator} {threshold}") logger.info(f"Final Average ROC {np.mean(roc_aucs)}") logger.info(f"Final Precision@n {np.mean(precision_at_ns)}") logger.info(f"====================================================") output = { Consts.DATA: dataset, Consts.ROC_AUC: np.mean(roc_aucs), Consts.PRECISION_A_N: np.mean(precision_at_ns), Consts.AGGREGATE: aggregator, Consts.BASE_MODEL: base_model, Consts.START_DIM: start_dim, Consts.END_DIM: end_dim, Consts.ENSEMBLE_SIZE: ENSEMBLE_SIZE } with open(output_path, "w") as w: w.write(f"{json.dumps(output)}\n") logger.info(f"Output file is {output_path}")
def subspace_count_per_point(): import json BIN_NUM = 10 outputs = defaultdict(dict) model = 'knn' for dataset in [ Dataset.VOWELS, Dataset.WINE, Dataset.BREASTW, Dataset.ANNTHYROID, Dataset.GLASS, Dataset.PIMA, Dataset.THYROID ]: logger.info("=" * 50) logger.info(f" Dataset {dataset} ") logger.info("=" * 50) _X, Y = DataLoader.load(dataset) outlier_num, inlier_num = np.sum(Y == 1), np.sum(Y == 0) feature_index = np.array([i for i in range(_X.shape[1])]) if model == "knn": mdl = kNN(max(10, int(np.floor(0.03 * _X.shape[0]))), Normalize.ZSCORE) X_gpu_tensor = _X elif model == "gke": mdl = GKE_GPU(Normalize.ZSCORE) X_gpu_tensor = GKE_GPU.convert_to_tensor(_X) model_outputs = [] for l in range(1, len(feature_index) + 1): for i in combinations(feature_index, l): model_outputs.append(mdl.fit(X_gpu_tensor[:, np.asarray(i)])) logger.info(f"Total model {len(model_outputs)}") for name, aggregator, threshold in [ ("RANK", Aggregator.count_rank_threshold, 0.05), ("RANK", Aggregator.count_rank_threshold, 0.10), ("STD", Aggregator.count_std_threshold, 1), ("STD", Aggregator.count_std_threshold, 2) ]: y_scores = np.array(aggregator(model_outputs, threshold)) outlier_subspaces, inlier_subspaces = y_scores[Y == 1], y_scores[ Y == 0] outlier_hist, bin = np.histogram(outlier_subspaces, BIN_NUM, range=(0.1, len(model_outputs))) bin = [f"{i / len(model_outputs):.1f}" for i in bin] zero_subspaces_outlier = sum( [1 for i in outlier_subspaces if i == 0]) print(zero_subspaces_outlier) print(outlier_hist) outlier_hist = np.insert(outlier_hist, 0, zero_subspaces_outlier) print(outlier_hist) assert np.sum(outlier_hist) == outlier_num inlier_hist = np.histogram(inlier_subspaces, BIN_NUM, range=(0.1, len(model_outputs)))[0] zero_subspaces_inlier = sum( [1 for i in inlier_subspaces if i == 0]) print(zero_subspaces_inlier) print(inlier_hist) inlier_hist = np.insert(inlier_hist, 0, zero_subspaces_inlier) print(inlier_hist) assert np.sum(inlier_hist) == inlier_num outlier_hist_percent = outlier_hist / outlier_num inlier_hist_percent = inlier_hist / inlier_num logger.info(f"Outlier {outlier_num} Inlier {inlier_num}") logger.info( f"Outlier Median {np.median(outlier_subspaces)} Inlier Median {np.median(inlier_subspaces)}" ) logger.info( f"Outlier Mean {np.mean(outlier_subspaces)} Inlier Mean {np.mean(inlier_subspaces)}" ) logger.info(f"Bin {bin}") logger.info(f"Outlier dist {outlier_hist}") logger.info(f"Inlier dist {inlier_hist}") logger.info(f"Outlier dist density {outlier_hist_percent}") logger.info(f"Inlier dist density {inlier_hist_percent}") outputs[f"{name}_{threshold}"][dataset] = { "outlier": outlier_hist_percent.tolist(), "inlier": inlier_hist_percent.tolist(), "bin": bin, "outlier_mean": np.mean(outlier_subspaces), "inlier_mean": np.mean(inlier_subspaces), "outlier_median": np.median(outlier_subspaces), "inlier_median": np.median(inlier_subspaces), } output_file = f"{model}_subspace_count_per_point.json" with open(output_file, "w") as w: w.write(f"{json.dumps(outputs)}\n") logger.info(f"Output file {output_file}")
def point_count_per_dim(): import json outputs = defaultdict(dict) model = "knn" for dataset in [ Dataset.VOWELS, Dataset.WINE, Dataset.BREASTW, Dataset.ANNTHYROID, Dataset.GLASS, Dataset.PIMA, Dataset.THYROID ]: logger.info("=" * 50) logger.info(f" Dataset {dataset} ") logger.info("=" * 50) _X, Y = DataLoader.load(dataset) feature_index = np.array([i for i in range(_X.shape[1])]) outlier_num, inlier_num = np.sum(Y == 1), np.sum(Y == 0) if model == "knn": mdl = GKE_GPU(Normalize.ZSCORE) X_gpu_tensor = GKE_GPU.convert_to_tensor(_X) else: mdl = kNN(max(10, int(np.floor(0.03 * _X.shape[0]))), Normalize.ZSCORE) X_gpu_tensor = _X model_outputs_all = defaultdict(list) for l in range(1, len(feature_index) + 1): for i in combinations(feature_index, l): model_outputs_all[l].append( mdl.fit(X_gpu_tensor[:, np.asarray(i)])) assert len(model_outputs_all) == len(feature_index) for name, aggregator, threshold in [ ("RANK", Aggregator.count_rank_threshold, 0.05), ("RANK", Aggregator.count_rank_threshold, 0.10), ("STD", Aggregator.count_std_threshold, 1), ("STD", Aggregator.count_std_threshold, 2) ]: dim_outlier_ratio = [0] * len(feature_index) dim_inlier_ratio = [0] * len(feature_index) for l, model_outputs in model_outputs_all.items(): y_scores = np.array(aggregator(model_outputs, threshold)) point_idx = set() for idx, score in enumerate(y_scores[Y == 1]): if score > 0: point_idx.add(idx) dim_outlier_ratio[l - 1] = len(point_idx) / outlier_num point_idx = set() for idx, score in enumerate(y_scores[Y == 0]): if score > 0: point_idx.add(idx) dim_inlier_ratio[l - 1] = len(point_idx) / inlier_num outputs[f"{name}_{threshold}"][dataset] = { "outlier": dim_outlier_ratio, "inlier": dim_inlier_ratio, "feature_index": feature_index.tolist() } with open(f"{model}_point_count_per_dim.json", "w") as w: w.write(f"{json.dumps(outputs)}")