def __init__(self, path, dist_type=DistType.Euclidian, eps=0.01, recall=0.7): self._params = { 'database': 'Refael', 'files_path': path, 'date_format': None, # Twitter 'directed': True, 'max_connected': False, 'logger_name': "logger", 'ftr_pairs': 300, 'identical_bar': 0.9, 'context_beta': 1, } # self._labels = [] # self._beta_matrix = None self.eps = eps self.recall = recall self.dit_type = dist_type self._logger = PrintLogger(self._params['logger_name']) self._graphs = Graphs(self._params['database'], files_path=self._params['files_path'], logger=self._logger, features_meta=ANOMALY_DETECTION_FEATURES, directed=self._params['directed'], date_format=self._params['date_format'], largest_cc=self._params['max_connected']) self._graphs.build(force_rebuild_ftr=REBUILD_FEATURES, pick_ftr=RE_PICK_FTR, should_zscore=False) self.labels = self._graphs.get_labels() # normalize features --------------------------------- self._graphs.norm_features(log_norm) pearson_picker = PearsonFeaturePicker(self._graphs, size=self._params['ftr_pairs'], logger=self._logger, identical_bar=self._params['identical_bar']) best_pairs = pearson_picker.best_pairs() beta = LinearContext(self._graphs, best_pairs, split=self._params['context_beta']) self.beta_matrix = beta.beta_matrix()
def build(self): pearson_picker = PearsonFeaturePicker( self._graphs, size=self._params['ftr_pairs'], logger=self._logger, identical_bar=self._params['identical_bar']) best_pairs = pearson_picker.best_pairs() beta = LinearContext(self._graphs, best_pairs, split=self._params['context_beta']) beta_matrix = beta.beta_matrix() score = KnnScore(beta_matrix, self._params['KNN_k'], self._params['database'], context_split=self._params['context_beta']) score.dist_heat_map(self._params['dist_mat_file_name']) anomaly_picker = ContextAnomalyPicker( self._graphs, score.score_list(), self._params['database'], logger=None, split=self._params['context_split'], bar=self._params['context_bar']) anomaly_picker.build() anomaly_picker.plot_anomalies(self._params['anomalies_file_name'], truth=self._ground_truth, info_text=self.param_to_str())
def _build(self): for lg in [True, False]: self._params.log = lg for vec_type in ["mean_regression", "regression"]: # motif_ratio self._params.vec_type = vec_type self.features = ANOMALY_DETECTION_FEATURES if self._params.vec_type == "regression" else MOTIF_FEATURES, if self._params.vec_type == "regression" or self._params.vec_type == "mean_regression": mx_dict = self._calc_matrix() concat_mx = np.vstack([mx for name, mx in mx_dict.items()]) for ftr_pairs in [ 3, 4, 5 ]: # [1, 2, 3, 4, 5, 10] [5, 10, 15, 20, 25, 30, 40, 45, 50]: # [1, 2, 3, 4, 5, 10, 15, 20, 25, 50, 70, 90, 110, 130, 150, 170, 200]: # [5, 10, 15, 20, 25, 30, 40, 45, 50] self._params.ftr_pairs = ftr_pairs for identical in [ 0.99 ]: # [0.7, 0.8, 0.9, 0.95, 0.99] [0.7, 0.8, 0.9, 0.95, 0.99] self._params.identical_bar = identical pearson_picker = PearsonFeaturePicker( concat_mx, size=self._params.ftr_pairs, logger=self._logger, identical_bar=self._params.identical_bar) for win in list( range( 25, min( 100, self._temporal_graph. number_of_graphs()), 25)): self._params.window_correlation = win best_pairs = pearson_picker.best_pairs() if best_pairs is None: continue if self._params.vec_type == "regression": beta = LinearContext( self._temporal_graph, mx_dict, best_pairs, window_size=self._params. window_correlation) else: beta = LinearMeanContext( self._temporal_graph, mx_dict, best_pairs, window_size=self._params. window_correlation) beta_matrix = beta.beta_matrix() self._pick_anomalies(beta_matrix) elif self._params.vec_type == "motif_ratio": self._graph_to_vec = self._calc_vec() beta_matrix = np.vstack([ self._graph_to_vec[name] for name in self._temporal_graph.graph_names() ]) self._pick_anomalies(beta_matrix)
def __init__(self, path, eps=0.01, recall=0.7): self._params = { 'database': 'Refael', 'files_path': path, 'date_format': None, # Twitter 'directed': True, 'max_connected': False, 'logger_name': "logger", 'ftr_pairs': 300, 'identical_bar': 0.95, 'context_beta': 1, } self._logger = PrintLogger(self._params['logger_name']) self._graphs = Graphs(self._params['database'], files_path=self._params['files_path'], logger=self._logger, features_meta=ANOMALY_DETECTION_FEATURES, directed=self._params['directed'], date_format=self._params['date_format'], largest_cc=self._params['max_connected']) self._graphs.build(force_rebuild_ftr=REBUILD_FEATURES, pick_ftr=RE_PICK_FTR, should_zscore=False) # normalize features --------------------------------- self._graphs.norm_features(log_norm) # labels self.labels = self._graphs.get_labels() pearson_picker = PearsonFeaturePicker( self._graphs, size=self._params['ftr_pairs'], logger=self._logger, identical_bar=self._params['identical_bar']) best_pairs = pearson_picker.best_pairs() self._pairs_header = best_pairs if os.path.exists(BETA_PKL_P): self._beta_matrix = pickle.load(open(BETA_PKL_P, "rb")) else: beta = LinearContext(self._graphs, best_pairs, split=self._params['context_beta']) self._beta_matrix = beta.beta_matrix() pickle.dump(self._beta_matrix, open(BETA_PKL_P, "wb")) self._beta_df = self._beta_matrix_to_df(header=self._pairs_header) # self._best_beta_df = self._best_pairs_df() self._best_beta_df = self._beta_df res_df = self._learn_RF( self._pca_df(self._best_beta_df, graph_data=True, min_nodes=10)) self.plot_learning_df(res_df)
def _calc_curr_time(self): pearson_picker = PearsonFeaturePicker( self._timed_graph, size=self._params['ftr_pairs'], logger=self._logger, identical_bar=self._params['identical_bar']) best_pairs = pearson_picker.best_pairs() beta = LinearContext(self._timed_graph, best_pairs, split=self._params['context_beta']) return beta.beta_matrix(), best_pairs, self._timed_graph.nodes_count_list(), \ self._timed_graph.edges_count_list(), self._timed_graph.get_labels()
def build_manipulations(self): for ftr_num in range(25, 32, 5): self._params['ftr_pairs'] = ftr_num for identical in range(90, 100, 1): self._params['identical_bar'] = round(identical * 0.10, 2) pearson_picker = PearsonFeaturePicker( self._graphs, size=self._params['ftr_pairs'], logger=self._logger, identical_bar=self._params['identical_bar']) best_pairs = pearson_picker.best_pairs() # beta = LinearRegBetaCalculator(self._graphs, best_pairs, single_c=self._params['single_c']) for context in range(4, 7, 2): self._params['context_beta'] = context beta = LinearContext(self._graphs, best_pairs, split=self._params['context_beta']) beta_matrix = beta.beta_matrix() for k in range( 5, min(100, int(self._graphs.number_of_graphs() / context))): self._params['KNN_k'] = k score = KnnScore( beta_matrix, self._params['KNN_k'], self._params['database'], context_split=self._params['context_beta']) # score = TestScore(beta_matrix, self._params['database']) score.dist_heat_map(self._params['dist_mat_file_name']) anomaly_picker = ContextAnomalyPicker( self._graphs, score.score_list(), self._params['database'], logger=None, split=self._params['context_split'], bar=self._params['context_bar']) anomaly_picker.build() anomaly_picker.plot_anomalies( self._params['anomalies_file_name'], truth=self._ground_truth, info_text=self.param_to_str())
def plot_correlations(self): from sklearn import linear_model mx_dict = self._calc_matrix() concat_mx = np.vstack([mx for name, mx in mx_dict.items()]) pearson_picker = PearsonFeaturePicker( concat_mx, size=self._params.ftr_pairs, logger=self._logger, identical_bar=self._params.identical_bar) best_pairs = pearson_picker.best_pairs() for i, j, u in best_pairs: reg = linear_model.LinearRegression().fit( np.transpose(concat_mx[:, i].T), np.transpose(concat_mx[:, j].T)) m = reg.coef_ b = reg.intercept_ ftr_i = concat_mx[:, i].T.tolist()[0] ftr_j = concat_mx[:, j].T.tolist()[0] p = figure(plot_width=600, plot_height=250, title=self._data_name + " regression " + str((i, j)), x_axis_label="time", y_axis_label="nodes_count") # create figure p.line(list(range(int(max(ftr_i)) + 1)), [m * i + b for i in range(10)], line_color="blue") # plot nodes p.scatter(list(ftr_i), list(ftr_j)) # plot nodes p.xaxis.major_label_overrides = { i: graph_name for i, graph_name in enumerate( self._temporal_graph.graph_names()) } # time to graph_name dict p.legend.location = "top_left" show(p) e = 0
def _build_first_method(self): mx_dict = self._calc_matrix() concat_mx = np.vstack([mx for name, mx in mx_dict.items()]) pearson_picker = PearsonFeaturePicker(concat_mx, size=self._params.ftr_pairs, logger=self._logger, identical_bar=self._params.identical_bar) best_pairs = pearson_picker.best_pairs() beta = LinearContext(self._temporal_graph, mx_dict, best_pairs, window_size=self._params.window_correlation) beta_matrix = beta.beta_matrix() if self._params.score_type == "knn": score = KnnScore(beta_matrix, self._params.KNN_k, self._data_name, window_size=self._params.window_score) elif self._params.score_type == "gmm": score = GmmScore(beta_matrix, self._data_name, window_size=self._params.window_score, n_components=self._params.n_components) else: # self._params["score_type"] == "local_outlier": score = LocalOutlierFactorScore(beta_matrix, self._data_name, window_size=self._params.window_score, n_neighbors=self._params.n_neighbors) anomaly_picker = SimpleAnomalyPicker(self._temporal_graph, score.score_list(), self._data_name, num_anomalies=self._params.n_outliers) anomaly_picker.build() anomaly_picker.plot_anomalies_bokeh(self._params.anomalies_file_name, truth=self._ground_truth, info_text=self._params.tostring())