def __init__(self, path, dist_type=DistType.Euclidian, eps=0.01, recall=0.7):

        self._params = {
            'database': 'Refael',
            'files_path': path,
            'date_format': None,  # Twitter
            'directed': True,
            'max_connected': False,
            'logger_name': "logger",
            'ftr_pairs': 300,
            'identical_bar': 0.9,
            'context_beta': 1,
        }

        # self._labels = []
        # self._beta_matrix = None
        self.eps = eps
        self.recall = recall
        self.dit_type = dist_type

        self._logger = PrintLogger(self._params['logger_name'])
        self._graphs = Graphs(self._params['database'], files_path=self._params['files_path'], logger=self._logger,
                              features_meta=ANOMALY_DETECTION_FEATURES, directed=self._params['directed'],
                              date_format=self._params['date_format'], largest_cc=self._params['max_connected'])
        self._graphs.build(force_rebuild_ftr=REBUILD_FEATURES, pick_ftr=RE_PICK_FTR, should_zscore=False)
        self.labels = self._graphs.get_labels()

        # normalize features ---------------------------------
        self._graphs.norm_features(log_norm)

        pearson_picker = PearsonFeaturePicker(self._graphs, size=self._params['ftr_pairs'],
                                              logger=self._logger, identical_bar=self._params['identical_bar'])
        best_pairs = pearson_picker.best_pairs()
        beta = LinearContext(self._graphs, best_pairs, split=self._params['context_beta'])
        self.beta_matrix = beta.beta_matrix()
    def build(self):
        pearson_picker = PearsonFeaturePicker(
            self._graphs,
            size=self._params['ftr_pairs'],
            logger=self._logger,
            identical_bar=self._params['identical_bar'])
        best_pairs = pearson_picker.best_pairs()
        beta = LinearContext(self._graphs,
                             best_pairs,
                             split=self._params['context_beta'])
        beta_matrix = beta.beta_matrix()
        score = KnnScore(beta_matrix,
                         self._params['KNN_k'],
                         self._params['database'],
                         context_split=self._params['context_beta'])
        score.dist_heat_map(self._params['dist_mat_file_name'])
        anomaly_picker = ContextAnomalyPicker(
            self._graphs,
            score.score_list(),
            self._params['database'],
            logger=None,
            split=self._params['context_split'],
            bar=self._params['context_bar'])

        anomaly_picker.build()
        anomaly_picker.plot_anomalies(self._params['anomalies_file_name'],
                                      truth=self._ground_truth,
                                      info_text=self.param_to_str())
Beispiel #3
0
    def _build(self):
        for lg in [True, False]:
            self._params.log = lg
            for vec_type in ["mean_regression", "regression"]:  # motif_ratio
                self._params.vec_type = vec_type
                self.features = ANOMALY_DETECTION_FEATURES if self._params.vec_type == "regression" else MOTIF_FEATURES,
                if self._params.vec_type == "regression" or self._params.vec_type == "mean_regression":
                    mx_dict = self._calc_matrix()
                    concat_mx = np.vstack([mx for name, mx in mx_dict.items()])
                    for ftr_pairs in [
                            3, 4, 5
                    ]:  # [1, 2, 3, 4, 5, 10] [5, 10, 15, 20, 25, 30, 40, 45, 50]: # [1, 2, 3, 4, 5, 10, 15, 20, 25, 50, 70, 90, 110, 130, 150, 170, 200]: #  [5, 10, 15, 20, 25, 30, 40, 45, 50]
                        self._params.ftr_pairs = ftr_pairs
                        for identical in [
                                0.99
                        ]:  #  [0.7, 0.8, 0.9, 0.95, 0.99] [0.7, 0.8, 0.9, 0.95, 0.99]
                            self._params.identical_bar = identical

                            pearson_picker = PearsonFeaturePicker(
                                concat_mx,
                                size=self._params.ftr_pairs,
                                logger=self._logger,
                                identical_bar=self._params.identical_bar)
                            for win in list(
                                    range(
                                        25,
                                        min(
                                            100,
                                            self._temporal_graph.
                                            number_of_graphs()), 25)):
                                self._params.window_correlation = win
                                best_pairs = pearson_picker.best_pairs()
                                if best_pairs is None:
                                    continue
                                if self._params.vec_type == "regression":
                                    beta = LinearContext(
                                        self._temporal_graph,
                                        mx_dict,
                                        best_pairs,
                                        window_size=self._params.
                                        window_correlation)
                                else:
                                    beta = LinearMeanContext(
                                        self._temporal_graph,
                                        mx_dict,
                                        best_pairs,
                                        window_size=self._params.
                                        window_correlation)
                                beta_matrix = beta.beta_matrix()
                                self._pick_anomalies(beta_matrix)

                elif self._params.vec_type == "motif_ratio":
                    self._graph_to_vec = self._calc_vec()
                    beta_matrix = np.vstack([
                        self._graph_to_vec[name]
                        for name in self._temporal_graph.graph_names()
                    ])
                    self._pick_anomalies(beta_matrix)
Beispiel #4
0
    def __init__(self, path, eps=0.01, recall=0.7):

        self._params = {
            'database': 'Refael',
            'files_path': path,
            'date_format': None,  # Twitter
            'directed': True,
            'max_connected': False,
            'logger_name': "logger",
            'ftr_pairs': 300,
            'identical_bar': 0.95,
            'context_beta': 1,
        }

        self._logger = PrintLogger(self._params['logger_name'])
        self._graphs = Graphs(self._params['database'],
                              files_path=self._params['files_path'],
                              logger=self._logger,
                              features_meta=ANOMALY_DETECTION_FEATURES,
                              directed=self._params['directed'],
                              date_format=self._params['date_format'],
                              largest_cc=self._params['max_connected'])
        self._graphs.build(force_rebuild_ftr=REBUILD_FEATURES,
                           pick_ftr=RE_PICK_FTR,
                           should_zscore=False)

        # normalize features ---------------------------------
        self._graphs.norm_features(log_norm)

        # labels
        self.labels = self._graphs.get_labels()

        pearson_picker = PearsonFeaturePicker(
            self._graphs,
            size=self._params['ftr_pairs'],
            logger=self._logger,
            identical_bar=self._params['identical_bar'])
        best_pairs = pearson_picker.best_pairs()
        self._pairs_header = best_pairs

        if os.path.exists(BETA_PKL_P):
            self._beta_matrix = pickle.load(open(BETA_PKL_P, "rb"))
        else:
            beta = LinearContext(self._graphs,
                                 best_pairs,
                                 split=self._params['context_beta'])
            self._beta_matrix = beta.beta_matrix()
            pickle.dump(self._beta_matrix, open(BETA_PKL_P, "wb"))

        self._beta_df = self._beta_matrix_to_df(header=self._pairs_header)
        # self._best_beta_df = self._best_pairs_df()
        self._best_beta_df = self._beta_df
        res_df = self._learn_RF(
            self._pca_df(self._best_beta_df, graph_data=True, min_nodes=10))
        self.plot_learning_df(res_df)
 def _calc_curr_time(self):
     pearson_picker = PearsonFeaturePicker(
         self._timed_graph,
         size=self._params['ftr_pairs'],
         logger=self._logger,
         identical_bar=self._params['identical_bar'])
     best_pairs = pearson_picker.best_pairs()
     beta = LinearContext(self._timed_graph,
                          best_pairs,
                          split=self._params['context_beta'])
     return beta.beta_matrix(), best_pairs, self._timed_graph.nodes_count_list(), \
            self._timed_graph.edges_count_list(), self._timed_graph.get_labels()
Beispiel #6
0
    def build_manipulations(self):
        for ftr_num in range(25, 32, 5):
            self._params['ftr_pairs'] = ftr_num
            for identical in range(90, 100, 1):
                self._params['identical_bar'] = round(identical * 0.10, 2)
                pearson_picker = PearsonFeaturePicker(
                    self._graphs,
                    size=self._params['ftr_pairs'],
                    logger=self._logger,
                    identical_bar=self._params['identical_bar'])
                best_pairs = pearson_picker.best_pairs()
                # beta = LinearRegBetaCalculator(self._graphs, best_pairs, single_c=self._params['single_c'])
                for context in range(4, 7, 2):
                    self._params['context_beta'] = context
                    beta = LinearContext(self._graphs,
                                         best_pairs,
                                         split=self._params['context_beta'])
                    beta_matrix = beta.beta_matrix()
                    for k in range(
                            5,
                            min(100,
                                int(self._graphs.number_of_graphs() /
                                    context))):
                        self._params['KNN_k'] = k
                        score = KnnScore(
                            beta_matrix,
                            self._params['KNN_k'],
                            self._params['database'],
                            context_split=self._params['context_beta'])
                        # score = TestScore(beta_matrix, self._params['database'])
                        score.dist_heat_map(self._params['dist_mat_file_name'])
                        anomaly_picker = ContextAnomalyPicker(
                            self._graphs,
                            score.score_list(),
                            self._params['database'],
                            logger=None,
                            split=self._params['context_split'],
                            bar=self._params['context_bar'])

                        anomaly_picker.build()
                        anomaly_picker.plot_anomalies(
                            self._params['anomalies_file_name'],
                            truth=self._ground_truth,
                            info_text=self.param_to_str())
Beispiel #7
0
    def plot_correlations(self):
        from sklearn import linear_model
        mx_dict = self._calc_matrix()
        concat_mx = np.vstack([mx for name, mx in mx_dict.items()])
        pearson_picker = PearsonFeaturePicker(
            concat_mx,
            size=self._params.ftr_pairs,
            logger=self._logger,
            identical_bar=self._params.identical_bar)
        best_pairs = pearson_picker.best_pairs()
        for i, j, u in best_pairs:
            reg = linear_model.LinearRegression().fit(
                np.transpose(concat_mx[:, i].T), np.transpose(concat_mx[:,
                                                                        j].T))
            m = reg.coef_
            b = reg.intercept_

            ftr_i = concat_mx[:, i].T.tolist()[0]
            ftr_j = concat_mx[:, j].T.tolist()[0]

            p = figure(plot_width=600,
                       plot_height=250,
                       title=self._data_name + " regression " + str((i, j)),
                       x_axis_label="time",
                       y_axis_label="nodes_count")  # create figure

            p.line(list(range(int(max(ftr_i)) + 1)),
                   [m * i + b for i in range(10)],
                   line_color="blue")  # plot nodes

            p.scatter(list(ftr_i), list(ftr_j))  # plot nodes
            p.xaxis.major_label_overrides = {
                i: graph_name
                for i, graph_name in enumerate(
                    self._temporal_graph.graph_names())
            }  # time to graph_name dict
            p.legend.location = "top_left"
            show(p)

        e = 0
 def _build_first_method(self):
     mx_dict = self._calc_matrix()
     concat_mx = np.vstack([mx for name, mx in mx_dict.items()])
     pearson_picker = PearsonFeaturePicker(concat_mx, size=self._params.ftr_pairs,
                                           logger=self._logger, identical_bar=self._params.identical_bar)
     best_pairs = pearson_picker.best_pairs()
     beta = LinearContext(self._temporal_graph, mx_dict, best_pairs, window_size=self._params.window_correlation)
     beta_matrix = beta.beta_matrix()
     if self._params.score_type == "knn":
         score = KnnScore(beta_matrix, self._params.KNN_k, self._data_name,
                          window_size=self._params.window_score)
     elif self._params.score_type == "gmm":
         score = GmmScore(beta_matrix, self._data_name, window_size=self._params.window_score,
                          n_components=self._params.n_components)
     else:   # self._params["score_type"] == "local_outlier":
         score = LocalOutlierFactorScore(beta_matrix, self._data_name, window_size=self._params.window_score,
                                         n_neighbors=self._params.n_neighbors)
     anomaly_picker = SimpleAnomalyPicker(self._temporal_graph, score.score_list(), self._data_name,
                                          num_anomalies=self._params.n_outliers)
     anomaly_picker.build()
     anomaly_picker.plot_anomalies_bokeh(self._params.anomalies_file_name, truth=self._ground_truth,
                                         info_text=self._params.tostring())