def _execute_for_3(self, motifs_picked):
     if self._params["load_motifs"] or os.path.exists(
             os.path.join(self._dir_path, 'motif3.pkl')):
         pkl3 = pickle.load(
             open(os.path.join(self._dir_path, "motif3.pkl"), "rb"))
         try:
             m3 = pkl3._features
             if type(m3) == dict:
                 motif3 = self._to_matrix_(m3)
             else:
                 motif3 = np.array(m3)
         except AttributeError:
             if type(pkl3) == dict:
                 motif3 = self._to_matrix(pkl3)
             else:
                 motif3 = np.array(pkl3)
         self._motif_mat = motif3
         self._motif_mat = self._motif_mat[:, motifs_picked]
         print(str(datetime.datetime.now()) + " , Calculated motifs")
         return
     motif_featutes = {"motif3": self._motif_features["motif3"]}
     g_ftrs = GraphFeatures(self._graph,
                            motif_featutes,
                            dir_path=self._dir_path)
     g_ftrs.build(should_dump=True)
     print(str(datetime.datetime.now()) + " , Calculated motifs")
     self._motif_mat = np.asarray(g_ftrs['motif3']._features)
     self._motif_mat = self._motif_mat[:, motifs_picked]
Exemple #2
0
    def _gnx_vec(self, gnx_id, gnx: nx.Graph, node_order):
        final_vec = []
        if self._deg:
            degrees = gnx.degree(gnx.nodes)
            final_vec.append(
                np.matrix([np.log(degrees[d] + 1e-3) for d in node_order]).T)
        if self._in_deg:
            degrees = gnx.in_degree(gnx.nodes)
            final_vec.append(
                np.matrix([np.log(degrees[d] + 1e-3) for d in node_order]).T)
        if self._out_deg:
            degrees = gnx.out_degree(gnx.nodes)
            final_vec.append(
                np.matrix([np.log(degrees[d] + 1e-3) for d in node_order]).T)
        if self._is_external_data and self._external_data.is_value:
            final_vec.append(
                np.matrix([
                    self._external_data.value_feature(gnx_id, d)
                    for d in node_order
                ]))
        if self._is_ftr:
            name = str(gnx_id)
            gnx_dir_path = os.path.join(self._ftr_path, name)
            if not os.path.exists(gnx_dir_path):
                os.mkdir(gnx_dir_path)
            raw_ftr = GraphFeatures(gnx,
                                    self._ftr_meta,
                                    dir_path=gnx_dir_path,
                                    is_max_connected=False,
                                    logger=PrintLogger("logger"))
            raw_ftr.build(should_dump=True)  # build features
            final_vec.append(
                FeaturesProcessor(raw_ftr).as_matrix(norm_func=log_norm))

        return np.hstack(final_vec)
 def _calc_motif3(self, gpu, device):
     if self._dir_path != "":
         if os.path.exists(os.path.join(self._dir_path, "motif3.pkl")):
             pkl3 = pickle.load(
                 open(os.path.join(self._dir_path, "motif3.pkl"), "rb"))
             if type(pkl3) == dict:
                 return pkl3
             elif type(pkl3) == list:
                 motif3 = {v: pkl3[v] for v in range(len(pkl3))}
                 return motif3
             else:
                 motif3 = pkl3._features
                 motif3dict = {v: motif3[v] for v in range(len(motif3))}
                 return motif3dict
     (graph, vertices_dict) = (self._graph, {v: v for v in self._graph.nodes()}) if not \
         sorted(list(self._graph.nodes()))[-1] != len(self._graph) - 1 else self._relabel_graph()
     raw_ftr = GraphFeatures(graph, {
         "motif3":
         FeatureMeta(nth_nodes_motif(3, gpu=gpu, device=device), {"m3"})
     },
                             dir_path=self._dir_path)
     raw_ftr.build(should_dump=True if self._dir_path != "" else False)
     motif3 = raw_ftr['motif3']._features
     motif3dict = {
         vertices_dict[v]: motif3[v]
         for v in range(len(vertices_dict))
     }
     return motif3dict
    def _calc_features(self, pkl=True):
        # load dictionary if exists
        if pkl and self._ftr_pkl_name() in os.listdir(
                os.path.join(self._base_dir, 'pkl', 'ftr_by_time_dictionaries')):
            self._features_by_time, self._multi_graphs_by_time = \
                pickle.load(open(os.path.join(self._base_dir, 'pkl', 'ftr_by_time_dictionaries',
                                              self._ftr_pkl_name()), "rb"))
            return

        self._load_database()
        labels = self._database.labels
        # make directory for database
        dir_path = os.path.join(self._base_dir, 'pkl', 'graph_measures', self._params['database_full_name'])
        if self._params['database_full_name'] not in os.listdir(os.path.join(self._base_dir, 'pkl', 'graph_measures')):
            os.mkdir(dir_path)

        # calculate features
        for multi_graph in self._database.multi_graph_by_window(self._params['window_size'],
                                                                self._params['start_time']):
            ftr_tmp_dict = {}
            for name in multi_graph.graph_names():
                raw_ftr = GraphFeatures(multi_graph.get_gnx(name), NODE_FEATURES_ML, dir_path,
                                        is_max_connected=self._params['max_connected'],
                                        logger=PrintLogger(self._params['database_full_name']))
                nodes_and_edges = [multi_graph.node_count(graph_id=name), multi_graph.edge_count(graph_id=name)]
                ftr_tmp_dict[name] = (FeaturesProcessor(raw_ftr).activate_motif_ratio_vec(to_add=nodes_and_edges),
                                      labels[name])
            self._features_by_time.append(ftr_tmp_dict)

            multi_graph.suspend_logger()
            self._multi_graphs_by_time.append(multi_graph)

        pickle.dump((self._features_by_time, self._multi_graphs_by_time),
                    open(os.path.join(self._base_dir, 'pkl', 'ftr_by_time_dictionaries', self._ftr_pkl_name()), "wb"))
Exemple #5
0
    def _set_index_to_ftr(self):
        gnx_name = self._temporal_graph.graph_names().__next__()
        gnx = self._temporal_graph.graphs().__next__()
        database_name = self._data_name + "_" + str(
            self._params.max_connected) + "_" + str(self._params.directed)
        gnx_path = os.path.join(self._base_dir, "pkl", "features",
                                database_name, gnx_name)
        gnx_ftr = GraphFeatures(gnx,
                                self._params.features,
                                dir_path=gnx_path,
                                logger=self._logger,
                                is_max_connected=self._params.max_connected)
        gnx_ftr.build(
            should_dump=False,
            force_build=self._params.FORCE_REBUILD_FEATURES)  # build features

        if not self._index_ftr:
            sorted_ftr = [
                f for f in sorted(gnx_ftr) if gnx_ftr[f].is_relevant()
            ]  # fix feature order (names)
            self._index_ftr = []

            for ftr in sorted_ftr:
                len_ftr = len(gnx_ftr[ftr])
                # fill list with (ftr, counter)
                self._index_ftr += self._get_motif_type(ftr, len_ftr) if ftr == 'motif3' or ftr == 'motif4' else \
                    [(ftr, i) for i in range(len_ftr)]
        return self._index_ftr
    def _calc_vec(self):
        database_name = self._params.database.DATABASE_NAME + "_" + \
                        str(self._params.max_connected) + "_" + str(self._params.directed)
        vec_pkl_path = os.path.join(self._base_dir, "pkl", "vectors", database_name + "_vectors_log_" +
                                    str(self._params.log) + ".pkl")
        if os.path.exists(vec_pkl_path):
            self._logger.info("loading pkl file - graph_vectors")
            return pickle.load(open(vec_pkl_path, "rb"))

        # create dir for database
        pkl_dir = os.path.join(self._base_dir, "pkl", "features")
        database_pkl_dir = os.path.join(pkl_dir, database_name)
        if database_name not in os.listdir(pkl_dir):
            os.mkdir(database_pkl_dir)

        gnx_to_vec = {}
        for gnx_name, gnx in zip(self._temporal_graph.graph_names(), self._temporal_graph.graphs()):
            # create dir for specific graph features
            gnx_path = os.path.join(database_pkl_dir, gnx_name)
            if gnx_name not in os.listdir(database_pkl_dir):
                os.mkdir(gnx_path)

            gnx_ftr = GraphFeatures(gnx, self._params.features, dir_path=gnx_path, logger=self._logger,
                                    is_max_connected=self._params.max_connected)
            gnx_ftr.build(should_dump=True, force_build=self._params.FORCE_REBUILD_FEATURES)  # build features
            # calc motif ratio vector
            gnx_to_vec[gnx_name] = FeaturesProcessor(gnx_ftr).activate_motif_ratio_vec(norm_func=log_norm)\
                if self._params.log else FeaturesProcessor(gnx_ftr).activate_motif_ratio_vec()

        pickle.dump(gnx_to_vec, open(vec_pkl_path, "wb"))
        return gnx_to_vec
    def collect_train_and_test_data_graph_features():
        """Collect some features from the given graph for train
		and test dataset.
		"""

        graph_features = GraphFeatures()
        graph_features.read_train_data()
        graph_features.read_test_data()

        X_train = graph_features.create_features_matrix("train")
        X_test = graph_features.create_features_matrix("test")

        print("\nTrain matrix dimensionality: ", X_train.shape)
        print("Test matrix dimensionality: ", X_test.shape)
        X_train_df = pd.DataFrame(
            data=X_train, columns=['out_degree', 'in_degree', 'avg_neig_deg'])
        X_train_df['Article'] = graph_features.train_ids
        X_train_df['Article'] = X_train_df['Article'].astype('int64')

        X_test_df = pd.DataFrame(
            data=X_test, columns=['out_degree', 'in_degree', 'avg_neig_deg'])
        X_test_df['Article'] = graph_features.test_ids
        X_test_df['Article'] = X_test_df['Article'].astype('int64')

        return X_train_df, X_test_df
 def _calc_betweenness(self):
     raw_ftr = GraphFeatures(self._graph,
                             {"betweenness": FeatureMeta(BetweennessCentralityCalculator, {"betweenness"})},
                             dir_path=self._dir_path)
     raw_ftr.build(should_dump=True)
     feature_dict = raw_ftr["betweenness"]._features
     feature_mx = np.zeros((len(feature_dict), 1))
     for i in feature_dict.keys():
         feature_mx[i] = feature_dict[i]
     return self._log_norm(feature_mx)
Exemple #9
0
 def build_features(self):
     gnx_ftr = GraphFeatures(self._gnx,
                             CHOSEN_FEATURES,
                             dir_path=os.path.join(self._data_dir,
                                                   "features"),
                             logger=self._logger)
     gnx_ftr.build(should_dump=True)  # build ALL_FEATURES
     self._features_mx = gnx_ftr.to_matrix(dtype=np.float32,
                                           mtype=np.matrix)
     print(self._features_mx.shape)
 def build_features_problem_ab(self, force_rebuild=False, largest_cc=False):
     if len(self._features_matrix_dict) != 0 and not force_rebuild:
         return
     gnx_name = '20-Apr-2001'
     self._logger.debug("calculating features for " + gnx_name)
     gnx_path = os.path.join(self._pkl_dir, gnx_name)
     if gnx_name not in os.listdir(self._pkl_dir):
         os.mkdir(gnx_path)
     gnx = self.subgraph_by_name(gnx_name)
     gnx_ftr = GraphFeatures(gnx, self._features_meta, dir_path=gnx_path, logger=self._logger, is_max_connected=largest_cc)
     gnx_ftr.build(should_dump=True)  # build ALL_FEATURES
     self._features_matrix_dict[gnx_name] = gnx_ftr.to_matrix(dtype=np.float32, mtype=np.matrix)
Exemple #11
0
 def build_features(self, largest_cc=False, should_zscore=True):
     for community in self._changed_communities:
         self._logger.debug("calculating features for " + community)
         gnx_path = os.path.join(self._pkl_dir, community)
         if community not in os.listdir(self._pkl_dir):
             os.mkdir(gnx_path)
         gnx = self.subgraph_by_name(community)
         gnx_ftr = GraphFeatures(gnx, self._features_meta, dir_path=gnx_path, logger=self._logger,
                                 is_max_connected=largest_cc)
         gnx_ftr.build(should_dump=False, force_build=True)  # build ALL_FEATURES
         self._features_matrix_dict[community] = (gnx, gnx_ftr)
     self._changed_communities = []
 def _calc_bfs(self):
     raw_ftr = GraphFeatures(
         self._graph,
         {"bfs_moments": FeatureMeta(BfsMomentsCalculator, {"bfs"})},
         dir_path=self._dir_path)
     raw_ftr.build(should_dump=True)
     feature_dict = raw_ftr["bfs_moments"]._features
     feature_mx = np.zeros(
         (len(feature_dict), len(list(feature_dict.values())[0][0])))
     for i in feature_dict.keys():
         for j in range(len(feature_dict[i][0])):
             feature_mx[i, j] = feature_dict[i][0][j]
     return self._log_norm(feature_mx)
 def _calc_motif3(self):
     raw_ftr = GraphFeatures(self._graph, {
         "motif3":
         FeatureMeta(nth_nodes_motif(3, gpu=self._gpu, device=self._device),
                     {"m3"})
     },
                             dir_path=self._dir_path)
     raw_ftr.build(should_dump=self._dump)
     feature = raw_ftr['motif3']._features
     if type(feature) == dict:
         motif_matrix = self._to_matrix(feature)
     else:
         motif_matrix = feature
     return self._log_norm(motif_matrix)
 def build_features(self, pick_ftr=False, force_rebuild=False, largest_cc=False, should_zscore=True):
     if len(self._features_matrix_dict) != 0 and not force_rebuild and not pick_ftr:
         return
     for gnx_name in self._list_id:
         self._logger.debug("calculating features for " + gnx_name)
         gnx_path = os.path.join(self._pkl_dir, gnx_name)
         if gnx_name not in os.listdir(self._pkl_dir):
             os.mkdir(gnx_path)
         gnx = self.subgraph_by_name(gnx_name)
         gnx_ftr = GraphFeatures(gnx, self._features_meta, dir_path=gnx_path, logger=self._logger,
                                 is_max_connected=largest_cc)
         gnx_ftr.build(should_dump=True, force_build=force_rebuild)  # build ALL_FEATURES
         self._features_matrix_dict[gnx_name] = gnx_ftr.to_matrix(dtype=np.float32, mtype=np.matrix,
                                                                  should_zscore=should_zscore)
 def _calc_motif4(self):
     # FOR NOW, NO GPU FOR US
     if os.path.exists(os.path.join(self._dir_path, "motif4.pkl")):
         pkl4 = pickle.load(
             open(os.path.join(self._dir_path, "motif4.pkl"), "rb"))
         if type(pkl4) == dict:
             motif4 = self._to_matrix(pkl4)
         elif type(pkl4) == MotifsNodeCalculator:
             motif4 = np.array(pkl4._features)
         else:
             motif4 = np.array(pkl4)
         if self._motif_choice == "All_Motifs":
             mp = MotifProbability(self._params['vertices'],
                                   self._params['probability'],
                                   self._params['clique_size'],
                                   self._params['directed'])
             motif3_count = 1 + mp.get_3_clique_motifs(3)[
                 -1]  # The full 3 clique is the last motif 3.
             clique_motifs = [
                 m - motif3_count for m in mp.get_3_clique_motifs(4)
             ]
             return motif4[:, clique_motifs]
         else:
             return motif4
     raw_ftr = GraphFeatures(self._graph, {
         "motif4":
         FeatureMeta(nth_nodes_motif(4, gpu=self._gpu, device=self._device),
                     {"m4"})
     },
                             dir_path=self._dir_path)
     raw_ftr.build(should_dump=True)
     feature = raw_ftr['motif4']._features
     if type(feature) == dict:
         motif_matrix = self._to_matrix(feature)
     else:
         motif_matrix = feature
     normed_matrix = self._log_norm(motif_matrix)
     if self._motif_choice == "All_Motifs":
         mp = MotifProbability(self._params['vertices'],
                               self._params['probability'],
                               self._params['clique_size'],
                               self._params['directed'])
         motif3_count = 1 + mp.get_3_clique_motifs(3)[
             -1]  # The full 3 clique is the last motif 3.
         clique_motifs = [
             m - motif3_count for m in mp.get_3_clique_motifs(4)
         ]
         return normed_matrix[:, clique_motifs]
     else:
         return normed_matrix
 def _calc_bfs(self):
     raw_ftr = GraphFeatures(
         self._graph,
         {"bfs_moments": FeatureMeta(BfsMomentsCalculator, {"bfs"})},
         dir_path="")
     raw_ftr.build(should_dump=False)
     feat = raw_ftr["bfs_moments"]._features
     if type(feat) == list:
         feature_mx = np.array(feat)
     else:
         feature_mx = np.zeros((len(feat), len(list(feat.values())[0][0])))
         for i in feat.keys():
             for j in range(len(feat[i][0])):
                 feature_mx[i, j] = feat[i][0][j]
     return self._log_norm(feature_mx)
 def _calc_motif4(self):
     raw_ftr = GraphFeatures(self._graph, {
         "motif4":
         FeatureMeta(nth_nodes_motif(4, gpu=self._gpu, device=self._device),
                     {"m4"})
     },
                             dir_path="")
     raw_ftr.build(should_dump=False)
     feature = raw_ftr['motif4']._features
     if type(feature) == dict:
         motif_matrix = self._to_matrix(feature)
     else:
         motif_matrix = feature
     normed_matrix = self._log_norm(motif_matrix)
     return normed_matrix
 def _calc_motif3(self):
     raw_ftr = GraphFeatures(self._graph,
                             {"motif3": FeatureMeta(nth_nodes_motif(3, gpu=self._gpu, device=self._device), {"m3"})},
                             dir_path=self._dir_path)
     raw_ftr.build(should_dump=True)
     feature = raw_ftr['motif3']._features
     if type(feature) == dict:
         motif_matrix = self._to_matrix(feature)
     else:
         motif_matrix = feature
     normed_matrix = self._log_norm(motif_matrix)
     if self._motif_choice == "All_Motifs":
         mp = MotifProbability(self._params['vertices'], self._params['probability'],
                               self._params['subgraph_size'], self._params['directed'])
         clique_motifs = mp.get_3_clique_motifs(3)
         return normed_matrix[:, clique_motifs]
     else:
         return normed_matrix
    def _execute_for_4(self, motifs_picked):
        if self._params["load_motifs"] or os.path.exists(
                os.path.join(self._dir_path, 'motif4.pkl')):
            pkl3 = pickle.load(
                open(os.path.join(self._dir_path, "motif3.pkl"), "rb"))
            pkl4 = pickle.load(
                open(os.path.join(self._dir_path, "motif4.pkl"), "rb"))
            try:
                m3 = pkl3._features
                if type(m3) == dict:
                    motif3 = self._to_matrix_(m3)
                else:
                    motif3 = np.array(m3)
            except AttributeError:
                if type(pkl3) == dict:
                    motif3 = self._to_matrix(pkl3)
                else:
                    motif3 = np.array(pkl3)
            try:
                m4 = pkl4._features
                if type(m4) == dict:
                    motif4 = self._to_matrix_(m4)
                else:
                    motif4 = np.array(m4)
            except AttributeError:
                if type(pkl4) == dict:
                    motif4 = self._to_matrix(pkl4)

                else:
                    motif4 = np.array(pkl4)
            self._motif_mat = np.hstack((motif3, motif4))
            if motifs_picked is not None:
                self._motif_mat = self._motif_mat[:, motifs_picked]
            print(str(datetime.datetime.now()) + " , Calculated motifs")
            return
        g_ftrs = GraphFeatures(self._graph,
                               self._motif_features,
                               dir_path=self._dir_path)
        g_ftrs.build(should_dump=True)
        print(str(datetime.datetime.now()) + " , Calculated motifs")
        self._motif_mat = np.hstack((np.asarray(g_ftrs['motif3']._features),
                                     np.asarray(g_ftrs['motif4']._features)))
        if motifs_picked is not None:
            self._motif_mat = self._motif_mat[:, motifs_picked]
 def _calc_motif4(self):
     raw_ftr = GraphFeatures(self._graph,
                             {"motif4": FeatureMeta(nth_nodes_motif(4, gpu=self._gpu, device=self._device), {"m4"})},
                             dir_path=self._dir_path)
     raw_ftr.build(should_dump=True)
     feature = raw_ftr['motif4']._features
     if type(feature) == dict:
         motif_matrix = self._to_matrix(feature)
     else:
         motif_matrix = feature
     normed_matrix = self._log_norm(motif_matrix)
     if self._motif_choice == "All_Motifs":
         mp = MotifProbability(self._params['vertices'], self._params['probability'],
                               self._params['subgraph_size'], self._params['directed'])
         motif3_count = 1 + mp.get_3_clique_motifs(3)[-1]  # The full 3 clique is the last motif 3.
         clique_motifs = [m - motif3_count for m in mp.get_3_clique_motifs(4)]
         return normed_matrix[:, clique_motifs]
     else:
         return normed_matrix
 def _calc_motif3(self):
     # FOR NOW, NO GPU FOR US
     if os.path.exists(os.path.join(self._dir_path, "motif3.pkl")):
         pkl3 = pickle.load(
             open(os.path.join(self._dir_path, "motif3.pkl"), "rb"))
         if type(pkl3) == dict:
             motif3 = self._to_matrix(pkl3)
         elif type(pkl3) == MotifsNodeCalculator:
             motif3 = np.array(pkl3._features)
         else:
             motif3 = np.array(pkl3)
         if self._motif_choice == "All_Motifs":
             mp = MotifProbability(self._params['vertices'],
                                   self._params['probability'],
                                   self._params['clique_size'],
                                   self._params['directed'])
             clique_motifs = mp.get_3_clique_motifs(3)
             return motif3[:, clique_motifs]
         else:
             return motif3
     raw_ftr = GraphFeatures(self._graph, {
         "motif3":
         FeatureMeta(nth_nodes_motif(3, gpu=self._gpu, device=self._device),
                     {"m3"})
     },
                             dir_path=self._dir_path)
     raw_ftr.build(should_dump=True)
     feature = raw_ftr['motif3']._features
     if type(feature) == dict:
         motif_matrix = self._to_matrix(feature)
     else:
         motif_matrix = feature
     normed_matrix = self._log_norm(motif_matrix)
     if self._motif_choice == "All_Motifs":
         mp = MotifProbability(self._params['vertices'],
                               self._params['probability'],
                               self._params['clique_size'],
                               self._params['directed'])
         clique_motifs = mp.get_3_clique_motifs(3)
         return normed_matrix[:, clique_motifs]
     else:
         return normed_matrix
Exemple #22
0
 def calculate_features(self, dumping_specs=None):
     """
     :param dumping_specs: A dictionary of specifications how to dump the non-special features.
                           The default is saving the class only (as a pickle file).
                           'object': What to save - either 'class' (save the calculator with the features inside),
                                     'feature' (the feature itself only, saved as name + '_ftr') or 'both'.
                                     Note that if only the feature is saved, when one calls the calculator again,
                                     the class will not load the feature and instead calculate it again.
                           'file_type': If the feature itself is saved, one can choose between two formats:
                                        either 'pkl' (save the feature as a pickle file, as is) or 'csv' (save a
                                        csv file of the feature values).
                           'vertex_names': If the features are saved as a csv file, there is an option of saving
                                           the name of each vertex in each row, before the feature values.
                                           The value here is a boolean indicating whether to put the original names
                                           the vertices in the beginning of each row.
     """
     if not len(self._features) + len(
             self._special_features) and self._verbose:
         print("No features were chosen!")
     else:
         self._adj_matrix = nx.adjacency_matrix(self._graph)
         # self._adj_matrix = self._adj_matrix.toarray()
         self._raw_features = GraphFeatures(gnx=self._graph,
                                            features=self._features,
                                            dir_path=self._dir_path,
                                            logger=self._logger)
         if dumping_specs is not None:
             if 'vertex_names' in dumping_specs:
                 if dumping_specs['vertex_names']:
                     dumping_specs['vertex_names'] = self._mapping
                 else:
                     del dumping_specs['vertex_names']
         self._raw_features.build(should_dump=True,
                                  dumping_specs=dumping_specs)
         self._other_features = OtherFeatures(self._graph,
                                              self._special_features,
                                              self._dir_path, self._params,
                                              self._logger)
         self._other_features.build(should_dump=True)
         self._logger.info(
             str(datetime.datetime.now()) + " , Calculated features")
 def calculate_features(self):
     if not len(self._features) + len(
             self._special_features) and self._verbose:
         print("No features were chosen!")
     else:
         self._adj_matrix = nx.adjacency_matrix(self._graph)
         # self._adj_matrix = self._adj_matrix.toarray()
         self._raw_features = GraphFeatures(gnx=self._graph,
                                            features=self._features,
                                            dir_path=self._dir_path,
                                            logger=self._logger)
         self._raw_features.build(
             should_dump=True
         )  # The option of multiple workers in this function exists.
         self._other_features = OtherFeatures(self._graph,
                                              self._special_features,
                                              self._dir_path, self._params,
                                              self._logger)
         self._other_features.build(should_dump=True)
         self._logger.info(
             str(datetime.datetime.now()) + " , Calculated features")
Exemple #24
0
    def _calc_matrix(self):
        database_name = self._data_name + "_" + str(
            self._params.max_connected) + "_" + str(self._params.directed)
        mat_pkl_path = os.path.join(self._base_dir, "pkl", "vectors",
                                    database_name + "_matrix.pkl")
        if os.path.exists(mat_pkl_path):
            self._logger.info("loading pkl file - graph_matrix")
            return pickle.load(open(mat_pkl_path, "rb"))

        gnx_to_vec = {}
        # create dir for database
        pkl_dir = os.path.join(self._base_dir, "pkl", "features")
        database_pkl_dir = os.path.join(pkl_dir, database_name)
        if database_name not in os.listdir(pkl_dir):
            os.mkdir(database_pkl_dir)

        for gnx_name, gnx in zip(self._temporal_graph.graph_names(),
                                 self._temporal_graph.graphs()):
            # create dir for specific graph features
            gnx_path = os.path.join(database_pkl_dir, gnx_name)
            if gnx_name not in os.listdir(database_pkl_dir):
                os.mkdir(gnx_path)

            gnx_ftr = GraphFeatures(
                gnx,
                self._params.features,
                dir_path=gnx_path,
                logger=self._logger,
                is_max_connected=self._params.max_connected)
            gnx_ftr.build(should_dump=True,
                          force_build=self._params.FORCE_REBUILD_FEATURES
                          )  # build features
            # calc motif ratio vector
            gnx_to_vec[gnx_name] = FeaturesProcessor(gnx_ftr).as_matrix()

        pickle.dump(gnx_to_vec, open(mat_pkl_path, "wb"))
        return gnx_to_vec
Exemple #25
0
 def _calculate_motif_matrix(self):
     if self._params["load_motifs"] or os.path.exists(
             os.path.join(self._dir_path, 'motif4.pkl')):
         pkl3 = pickle.load(
             open(os.path.join(self._dir_path, "motif3.pkl"), "rb"))
         pkl4 = pickle.load(
             open(os.path.join(self._dir_path, "motif4.pkl"), "rb"))
         if type(pkl3) == dict:
             motif3 = self._to_matrix(pkl3)
         elif type(pkl3) == MotifsNodeCalculator:
             pkl3 = pkl3._features
             if type(pkl3) == list:
                 motif3 = np.array(pkl3)
             else:
                 motif3 = self._to_matrix_(pkl3)
         else:
             motif3 = np.array(pkl3)
         if type(pkl4) == dict:
             motif4 = self._to_matrix(pkl4)
         elif type(pkl4) == MotifsNodeCalculator:
             pkl4 = pkl4._features
             if type(pkl4) == list:
                 motif4 = np.array(pkl4)
             else:
                 motif4 = self._to_matrix_(pkl4)
         else:
             motif4 = np.array(pkl4)
         self._motif_mat = np.hstack((motif3, motif4))
         print(str(datetime.datetime.now()) + " , Calculated motifs")
         return
     g_ftrs = GraphFeatures(self._graph,
                            self._motif_features,
                            dir_path=self._dir_path)
     g_ftrs.build(should_dump=True)
     print(str(datetime.datetime.now()) + " , Calculated motifs")
     self._motif_mat = np.hstack((np.asarray(g_ftrs['motif3']._features),
                                  np.asarray(g_ftrs['motif4']._features)))
Exemple #26
0
        if to_add:
            to_add = self._convert_dict_to_list(to_add, entries_order)
            as_matrix = np.hstack((as_matrix, np.matrix(to_add)))
        if norm_func:
            as_matrix = norm_func(as_matrix)
        return as_matrix


if __name__ == "__main__":
    import networkx as nx

    gnx = nx.Graph()
    gnx.add_edges_from([
        (1, 2),
        (1, 3),
        (2, 3),
        (2, 7),
        (7, 8),
        (3, 6),
        (4, 6),
        (6, 8),
        (5, 6),
    ])
    gnx_ftr = GraphFeatures(gnx,
                            FeaturesMeta().NODE_LEVEL,
                            ".",
                            is_max_connected=True)
    fp = FeaturesProcessor(gnx_ftr)
    fp.activate_motif_ratio_vec()
    e = 0
        header = []
        for i in range(num_motifs):
            header.append((motif_type, i))
        return header

    @staticmethod
    def is_motif(ftr):
        return ftr == 'motif4' or ftr == "motif3"


if __name__ == "__main__":
    import networkx as nx
    from feature_meta import NODE_FEATURES

    gnx = nx.Graph()
    gnx.add_edges_from([
        (1, 2),
        (1, 3),
        (2, 3),
        (2, 7),
        (7, 8),
        (3, 6),
        (4, 6),
        (6, 8),
        (5, 6),
    ])
    gnx_ftr = GraphFeatures(gnx, NODE_FEATURES, ".", is_max_connected=True)
    gnx_ftr.build()
    m = MotifRatio(gnx_ftr, False)
    e = 0
    def _calc_features(self, pkl=True):
        # load dictionary if exists
        if pkl and self._ftr_pkl_name() in os.listdir(
                os.path.join(self._base_dir, 'pkl',
                             'ftr_by_time_dictionaries')):
            self._features_by_time, self._multi_graphs_by_time = \
                pickle.load(open(os.path.join(self._base_dir, 'pkl', 'ftr_by_time_dictionaries',
                                              self._ftr_pkl_name()), "rb"))
            return

        self._load_database()
        labels = self._database.labels
        # make directory for database
        dir_path = os.path.join(self._base_dir, 'pkl', 'graph_measures')
        if self._params['database_full_name'] not in os.listdir(dir_path):
            os.mkdir(os.path.join(dir_path,
                                  self._params['database_full_name']))
        dir_path = os.path.join(dir_path, self._params['database_full_name'])

        # calculate features
        for i, multi_graph in enumerate(
                self._database.multi_graph_by_window(
                    self._params['window_size'], self._params['start_time'])):
            if "time_" + str(i) not in os.listdir(dir_path):
                os.mkdir(os.path.join(dir_path, "time_" + str(i)))
            mg_dir_path = os.path.join(dir_path, "time_" + str(i))

            ftr_tmp_dict = {}
            # nodes_and_edges = {}
            for name in multi_graph.graph_names():
                if name not in os.listdir(mg_dir_path):
                    os.mkdir(os.path.join(mg_dir_path, name))
                gnx_dir_path = os.path.join(mg_dir_path, name)

                raw_ftr = GraphFeatures(
                    multi_graph.get_gnx(name),
                    NODE_FEATURES_ML,
                    dir_path=gnx_dir_path,
                    is_max_connected=self._params['max_connected'],
                    logger=PrintLogger(self._params['database_full_name']))
                raw_ftr.build(should_dump=True)  # build features
                nodes_and_edges = [
                    np.log(1 + multi_graph.node_count(graph_id=name)),
                    np.log(1 + multi_graph.edge_count(graph_id=name))
                ]
                # nodes_and_edges = [multi_graph.node_count(graph_id=name), multi_graph.edge_count(graph_id=name)]
                # nodes_and_edges[name] = [multi_graph.node_count(graph_id=name), multi_graph.edge_count(graph_id=name)]

                # ====================== motif ratio ========================
                ftr_tmp_dict[name] = (
                    FeaturesProcessor(raw_ftr).activate_motif_ratio_vec(
                        to_add=nodes_and_edges), labels[name])

                # ==================== ftr correlation ======================
                # ftr_tmp_dict[name] = (FeaturesProcessor(raw_ftr).as_matrix(norm_func=log_norm))
                # ftr_tmp_dict[name] = (FeaturesProcessor(raw_ftr).as_matrix())

            # concat_mx = np.vstack([mx for name, mx in ftr_tmp_dict.items()])
            # pearson_picker = PearsonFeaturePicker(concat_mx, size=self._params['ftr_pairs'],
            #                                       identical_bar=0.9)
            # best_pairs = pearson_picker.best_pairs()
            # beta = LinearContext(multi_graph, ftr_tmp_dict, best_pairs, window_size=len(ftr_tmp_dict))
            # beta_matrix = beta.beta_matrix()
            # node and edges can pe appended here
            # for j, name in enumerate(multi_graph.graph_names()):
            #     ftr_tmp_dict[name] = (np.hstack((beta_matrix[j], nodes_and_edges[name])), labels[name])

            self._features_by_time.append(ftr_tmp_dict)

            multi_graph.suspend_logger()
            self._multi_graphs_by_time.append(multi_graph)

        pickle.dump((self._features_by_time, self._multi_graphs_by_time),
                    open(
                        os.path.join(self._base_dir, 'pkl',
                                     'ftr_by_time_dictionaries',
                                     self._ftr_pkl_name()), "wb"))