def _calc_vec(self): database_name = self._params.database.DATABASE_NAME + "_" + \ str(self._params.max_connected) + "_" + str(self._params.directed) vec_pkl_path = os.path.join(self._base_dir, "pkl", "vectors", database_name + "_vectors_log_" + str(self._params.log) + ".pkl") if os.path.exists(vec_pkl_path): self._logger.info("loading pkl file - graph_vectors") return pickle.load(open(vec_pkl_path, "rb")) # create dir for database pkl_dir = os.path.join(self._base_dir, "pkl", "features") database_pkl_dir = os.path.join(pkl_dir, database_name) if database_name not in os.listdir(pkl_dir): os.mkdir(database_pkl_dir) gnx_to_vec = {} for gnx_name, gnx in zip(self._temporal_graph.graph_names(), self._temporal_graph.graphs()): # create dir for specific graph features gnx_path = os.path.join(database_pkl_dir, gnx_name) if gnx_name not in os.listdir(database_pkl_dir): os.mkdir(gnx_path) gnx_ftr = GraphFeatures(gnx, self._params.features, dir_path=gnx_path, logger=self._logger, is_max_connected=self._params.max_connected) gnx_ftr.build(should_dump=True, force_build=self._params.FORCE_REBUILD_FEATURES) # build features # calc motif ratio vector gnx_to_vec[gnx_name] = FeaturesProcessor(gnx_ftr).activate_motif_ratio_vec(norm_func=log_norm)\ if self._params.log else FeaturesProcessor(gnx_ftr).activate_motif_ratio_vec() pickle.dump(gnx_to_vec, open(vec_pkl_path, "wb")) return gnx_to_vec
def _execute_for_3(self, motifs_picked): if self._params["load_motifs"] or os.path.exists( os.path.join(self._dir_path, 'motif3.pkl')): pkl3 = pickle.load( open(os.path.join(self._dir_path, "motif3.pkl"), "rb")) try: m3 = pkl3._features if type(m3) == dict: motif3 = self._to_matrix_(m3) else: motif3 = np.array(m3) except AttributeError: if type(pkl3) == dict: motif3 = self._to_matrix(pkl3) else: motif3 = np.array(pkl3) self._motif_mat = motif3 self._motif_mat = self._motif_mat[:, motifs_picked] print(str(datetime.datetime.now()) + " , Calculated motifs") return motif_featutes = {"motif3": self._motif_features["motif3"]} g_ftrs = GraphFeatures(self._graph, motif_featutes, dir_path=self._dir_path) g_ftrs.build(should_dump=True) print(str(datetime.datetime.now()) + " , Calculated motifs") self._motif_mat = np.asarray(g_ftrs['motif3']._features) self._motif_mat = self._motif_mat[:, motifs_picked]
def _set_index_to_ftr(self): gnx_name = self._temporal_graph.graph_names().__next__() gnx = self._temporal_graph.graphs().__next__() database_name = self._data_name + "_" + str( self._params.max_connected) + "_" + str(self._params.directed) gnx_path = os.path.join(self._base_dir, "pkl", "features", database_name, gnx_name) gnx_ftr = GraphFeatures(gnx, self._params.features, dir_path=gnx_path, logger=self._logger, is_max_connected=self._params.max_connected) gnx_ftr.build( should_dump=False, force_build=self._params.FORCE_REBUILD_FEATURES) # build features if not self._index_ftr: sorted_ftr = [ f for f in sorted(gnx_ftr) if gnx_ftr[f].is_relevant() ] # fix feature order (names) self._index_ftr = [] for ftr in sorted_ftr: len_ftr = len(gnx_ftr[ftr]) # fill list with (ftr, counter) self._index_ftr += self._get_motif_type(ftr, len_ftr) if ftr == 'motif3' or ftr == 'motif4' else \ [(ftr, i) for i in range(len_ftr)] return self._index_ftr
def _gnx_vec(self, gnx_id, gnx: nx.Graph, node_order): final_vec = [] if self._deg: degrees = gnx.degree(gnx.nodes) final_vec.append( np.matrix([np.log(degrees[d] + 1e-3) for d in node_order]).T) if self._in_deg: degrees = gnx.in_degree(gnx.nodes) final_vec.append( np.matrix([np.log(degrees[d] + 1e-3) for d in node_order]).T) if self._out_deg: degrees = gnx.out_degree(gnx.nodes) final_vec.append( np.matrix([np.log(degrees[d] + 1e-3) for d in node_order]).T) if self._is_external_data and self._external_data.is_value: final_vec.append( np.matrix([ self._external_data.value_feature(gnx_id, d) for d in node_order ])) if self._is_ftr: name = str(gnx_id) gnx_dir_path = os.path.join(self._ftr_path, name) if not os.path.exists(gnx_dir_path): os.mkdir(gnx_dir_path) raw_ftr = GraphFeatures(gnx, self._ftr_meta, dir_path=gnx_dir_path, is_max_connected=False, logger=PrintLogger("logger")) raw_ftr.build(should_dump=True) # build features final_vec.append( FeaturesProcessor(raw_ftr).as_matrix(norm_func=log_norm)) return np.hstack(final_vec)
def _calc_motif3(self, gpu, device): if self._dir_path != "": if os.path.exists(os.path.join(self._dir_path, "motif3.pkl")): pkl3 = pickle.load( open(os.path.join(self._dir_path, "motif3.pkl"), "rb")) if type(pkl3) == dict: return pkl3 elif type(pkl3) == list: motif3 = {v: pkl3[v] for v in range(len(pkl3))} return motif3 else: motif3 = pkl3._features motif3dict = {v: motif3[v] for v in range(len(motif3))} return motif3dict (graph, vertices_dict) = (self._graph, {v: v for v in self._graph.nodes()}) if not \ sorted(list(self._graph.nodes()))[-1] != len(self._graph) - 1 else self._relabel_graph() raw_ftr = GraphFeatures(graph, { "motif3": FeatureMeta(nth_nodes_motif(3, gpu=gpu, device=device), {"m3"}) }, dir_path=self._dir_path) raw_ftr.build(should_dump=True if self._dir_path != "" else False) motif3 = raw_ftr['motif3']._features motif3dict = { vertices_dict[v]: motif3[v] for v in range(len(vertices_dict)) } return motif3dict
def _calc_betweenness(self): raw_ftr = GraphFeatures(self._graph, {"betweenness": FeatureMeta(BetweennessCentralityCalculator, {"betweenness"})}, dir_path=self._dir_path) raw_ftr.build(should_dump=True) feature_dict = raw_ftr["betweenness"]._features feature_mx = np.zeros((len(feature_dict), 1)) for i in feature_dict.keys(): feature_mx[i] = feature_dict[i] return self._log_norm(feature_mx)
def build_features(self): gnx_ftr = GraphFeatures(self._gnx, CHOSEN_FEATURES, dir_path=os.path.join(self._data_dir, "features"), logger=self._logger) gnx_ftr.build(should_dump=True) # build ALL_FEATURES self._features_mx = gnx_ftr.to_matrix(dtype=np.float32, mtype=np.matrix) print(self._features_mx.shape)
def build_features_problem_ab(self, force_rebuild=False, largest_cc=False): if len(self._features_matrix_dict) != 0 and not force_rebuild: return gnx_name = '20-Apr-2001' self._logger.debug("calculating features for " + gnx_name) gnx_path = os.path.join(self._pkl_dir, gnx_name) if gnx_name not in os.listdir(self._pkl_dir): os.mkdir(gnx_path) gnx = self.subgraph_by_name(gnx_name) gnx_ftr = GraphFeatures(gnx, self._features_meta, dir_path=gnx_path, logger=self._logger, is_max_connected=largest_cc) gnx_ftr.build(should_dump=True) # build ALL_FEATURES self._features_matrix_dict[gnx_name] = gnx_ftr.to_matrix(dtype=np.float32, mtype=np.matrix)
def build_features(self, largest_cc=False, should_zscore=True): for community in self._changed_communities: self._logger.debug("calculating features for " + community) gnx_path = os.path.join(self._pkl_dir, community) if community not in os.listdir(self._pkl_dir): os.mkdir(gnx_path) gnx = self.subgraph_by_name(community) gnx_ftr = GraphFeatures(gnx, self._features_meta, dir_path=gnx_path, logger=self._logger, is_max_connected=largest_cc) gnx_ftr.build(should_dump=False, force_build=True) # build ALL_FEATURES self._features_matrix_dict[community] = (gnx, gnx_ftr) self._changed_communities = []
def _calc_bfs(self): raw_ftr = GraphFeatures( self._graph, {"bfs_moments": FeatureMeta(BfsMomentsCalculator, {"bfs"})}, dir_path=self._dir_path) raw_ftr.build(should_dump=True) feature_dict = raw_ftr["bfs_moments"]._features feature_mx = np.zeros( (len(feature_dict), len(list(feature_dict.values())[0][0]))) for i in feature_dict.keys(): for j in range(len(feature_dict[i][0])): feature_mx[i, j] = feature_dict[i][0][j] return self._log_norm(feature_mx)
def _calc_motif3(self): raw_ftr = GraphFeatures(self._graph, { "motif3": FeatureMeta(nth_nodes_motif(3, gpu=self._gpu, device=self._device), {"m3"}) }, dir_path=self._dir_path) raw_ftr.build(should_dump=self._dump) feature = raw_ftr['motif3']._features if type(feature) == dict: motif_matrix = self._to_matrix(feature) else: motif_matrix = feature return self._log_norm(motif_matrix)
def build_features(self, pick_ftr=False, force_rebuild=False, largest_cc=False, should_zscore=True): if len(self._features_matrix_dict) != 0 and not force_rebuild and not pick_ftr: return for gnx_name in self._list_id: self._logger.debug("calculating features for " + gnx_name) gnx_path = os.path.join(self._pkl_dir, gnx_name) if gnx_name not in os.listdir(self._pkl_dir): os.mkdir(gnx_path) gnx = self.subgraph_by_name(gnx_name) gnx_ftr = GraphFeatures(gnx, self._features_meta, dir_path=gnx_path, logger=self._logger, is_max_connected=largest_cc) gnx_ftr.build(should_dump=True, force_build=force_rebuild) # build ALL_FEATURES self._features_matrix_dict[gnx_name] = gnx_ftr.to_matrix(dtype=np.float32, mtype=np.matrix, should_zscore=should_zscore)
def _calc_motif4(self): # FOR NOW, NO GPU FOR US if os.path.exists(os.path.join(self._dir_path, "motif4.pkl")): pkl4 = pickle.load( open(os.path.join(self._dir_path, "motif4.pkl"), "rb")) if type(pkl4) == dict: motif4 = self._to_matrix(pkl4) elif type(pkl4) == MotifsNodeCalculator: motif4 = np.array(pkl4._features) else: motif4 = np.array(pkl4) if self._motif_choice == "All_Motifs": mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['clique_size'], self._params['directed']) motif3_count = 1 + mp.get_3_clique_motifs(3)[ -1] # The full 3 clique is the last motif 3. clique_motifs = [ m - motif3_count for m in mp.get_3_clique_motifs(4) ] return motif4[:, clique_motifs] else: return motif4 raw_ftr = GraphFeatures(self._graph, { "motif4": FeatureMeta(nth_nodes_motif(4, gpu=self._gpu, device=self._device), {"m4"}) }, dir_path=self._dir_path) raw_ftr.build(should_dump=True) feature = raw_ftr['motif4']._features if type(feature) == dict: motif_matrix = self._to_matrix(feature) else: motif_matrix = feature normed_matrix = self._log_norm(motif_matrix) if self._motif_choice == "All_Motifs": mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['clique_size'], self._params['directed']) motif3_count = 1 + mp.get_3_clique_motifs(3)[ -1] # The full 3 clique is the last motif 3. clique_motifs = [ m - motif3_count for m in mp.get_3_clique_motifs(4) ] return normed_matrix[:, clique_motifs] else: return normed_matrix
def _calc_motif4(self): raw_ftr = GraphFeatures(self._graph, { "motif4": FeatureMeta(nth_nodes_motif(4, gpu=self._gpu, device=self._device), {"m4"}) }, dir_path="") raw_ftr.build(should_dump=False) feature = raw_ftr['motif4']._features if type(feature) == dict: motif_matrix = self._to_matrix(feature) else: motif_matrix = feature normed_matrix = self._log_norm(motif_matrix) return normed_matrix
def _calc_bfs(self): raw_ftr = GraphFeatures( self._graph, {"bfs_moments": FeatureMeta(BfsMomentsCalculator, {"bfs"})}, dir_path="") raw_ftr.build(should_dump=False) feat = raw_ftr["bfs_moments"]._features if type(feat) == list: feature_mx = np.array(feat) else: feature_mx = np.zeros((len(feat), len(list(feat.values())[0][0]))) for i in feat.keys(): for j in range(len(feat[i][0])): feature_mx[i, j] = feat[i][0][j] return self._log_norm(feature_mx)
def _calc_motif3(self): raw_ftr = GraphFeatures(self._graph, {"motif3": FeatureMeta(nth_nodes_motif(3, gpu=self._gpu, device=self._device), {"m3"})}, dir_path=self._dir_path) raw_ftr.build(should_dump=True) feature = raw_ftr['motif3']._features if type(feature) == dict: motif_matrix = self._to_matrix(feature) else: motif_matrix = feature normed_matrix = self._log_norm(motif_matrix) if self._motif_choice == "All_Motifs": mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['subgraph_size'], self._params['directed']) clique_motifs = mp.get_3_clique_motifs(3) return normed_matrix[:, clique_motifs] else: return normed_matrix
def _execute_for_4(self, motifs_picked): if self._params["load_motifs"] or os.path.exists( os.path.join(self._dir_path, 'motif4.pkl')): pkl3 = pickle.load( open(os.path.join(self._dir_path, "motif3.pkl"), "rb")) pkl4 = pickle.load( open(os.path.join(self._dir_path, "motif4.pkl"), "rb")) try: m3 = pkl3._features if type(m3) == dict: motif3 = self._to_matrix_(m3) else: motif3 = np.array(m3) except AttributeError: if type(pkl3) == dict: motif3 = self._to_matrix(pkl3) else: motif3 = np.array(pkl3) try: m4 = pkl4._features if type(m4) == dict: motif4 = self._to_matrix_(m4) else: motif4 = np.array(m4) except AttributeError: if type(pkl4) == dict: motif4 = self._to_matrix(pkl4) else: motif4 = np.array(pkl4) self._motif_mat = np.hstack((motif3, motif4)) if motifs_picked is not None: self._motif_mat = self._motif_mat[:, motifs_picked] print(str(datetime.datetime.now()) + " , Calculated motifs") return g_ftrs = GraphFeatures(self._graph, self._motif_features, dir_path=self._dir_path) g_ftrs.build(should_dump=True) print(str(datetime.datetime.now()) + " , Calculated motifs") self._motif_mat = np.hstack((np.asarray(g_ftrs['motif3']._features), np.asarray(g_ftrs['motif4']._features))) if motifs_picked is not None: self._motif_mat = self._motif_mat[:, motifs_picked]
def _calc_motif4(self): raw_ftr = GraphFeatures(self._graph, {"motif4": FeatureMeta(nth_nodes_motif(4, gpu=self._gpu, device=self._device), {"m4"})}, dir_path=self._dir_path) raw_ftr.build(should_dump=True) feature = raw_ftr['motif4']._features if type(feature) == dict: motif_matrix = self._to_matrix(feature) else: motif_matrix = feature normed_matrix = self._log_norm(motif_matrix) if self._motif_choice == "All_Motifs": mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['subgraph_size'], self._params['directed']) motif3_count = 1 + mp.get_3_clique_motifs(3)[-1] # The full 3 clique is the last motif 3. clique_motifs = [m - motif3_count for m in mp.get_3_clique_motifs(4)] return normed_matrix[:, clique_motifs] else: return normed_matrix
def _calc_motif3(self): # FOR NOW, NO GPU FOR US if os.path.exists(os.path.join(self._dir_path, "motif3.pkl")): pkl3 = pickle.load( open(os.path.join(self._dir_path, "motif3.pkl"), "rb")) if type(pkl3) == dict: motif3 = self._to_matrix(pkl3) elif type(pkl3) == MotifsNodeCalculator: motif3 = np.array(pkl3._features) else: motif3 = np.array(pkl3) if self._motif_choice == "All_Motifs": mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['clique_size'], self._params['directed']) clique_motifs = mp.get_3_clique_motifs(3) return motif3[:, clique_motifs] else: return motif3 raw_ftr = GraphFeatures(self._graph, { "motif3": FeatureMeta(nth_nodes_motif(3, gpu=self._gpu, device=self._device), {"m3"}) }, dir_path=self._dir_path) raw_ftr.build(should_dump=True) feature = raw_ftr['motif3']._features if type(feature) == dict: motif_matrix = self._to_matrix(feature) else: motif_matrix = feature normed_matrix = self._log_norm(motif_matrix) if self._motif_choice == "All_Motifs": mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['clique_size'], self._params['directed']) clique_motifs = mp.get_3_clique_motifs(3) return normed_matrix[:, clique_motifs] else: return normed_matrix
def _calc_matrix(self): database_name = self._data_name + "_" + str( self._params.max_connected) + "_" + str(self._params.directed) mat_pkl_path = os.path.join(self._base_dir, "pkl", "vectors", database_name + "_matrix.pkl") if os.path.exists(mat_pkl_path): self._logger.info("loading pkl file - graph_matrix") return pickle.load(open(mat_pkl_path, "rb")) gnx_to_vec = {} # create dir for database pkl_dir = os.path.join(self._base_dir, "pkl", "features") database_pkl_dir = os.path.join(pkl_dir, database_name) if database_name not in os.listdir(pkl_dir): os.mkdir(database_pkl_dir) for gnx_name, gnx in zip(self._temporal_graph.graph_names(), self._temporal_graph.graphs()): # create dir for specific graph features gnx_path = os.path.join(database_pkl_dir, gnx_name) if gnx_name not in os.listdir(database_pkl_dir): os.mkdir(gnx_path) gnx_ftr = GraphFeatures( gnx, self._params.features, dir_path=gnx_path, logger=self._logger, is_max_connected=self._params.max_connected) gnx_ftr.build(should_dump=True, force_build=self._params.FORCE_REBUILD_FEATURES ) # build features # calc motif ratio vector gnx_to_vec[gnx_name] = FeaturesProcessor(gnx_ftr).as_matrix() pickle.dump(gnx_to_vec, open(mat_pkl_path, "wb")) return gnx_to_vec
def _calculate_motif_matrix(self): if self._params["load_motifs"] or os.path.exists( os.path.join(self._dir_path, 'motif4.pkl')): pkl3 = pickle.load( open(os.path.join(self._dir_path, "motif3.pkl"), "rb")) pkl4 = pickle.load( open(os.path.join(self._dir_path, "motif4.pkl"), "rb")) if type(pkl3) == dict: motif3 = self._to_matrix(pkl3) elif type(pkl3) == MotifsNodeCalculator: pkl3 = pkl3._features if type(pkl3) == list: motif3 = np.array(pkl3) else: motif3 = self._to_matrix_(pkl3) else: motif3 = np.array(pkl3) if type(pkl4) == dict: motif4 = self._to_matrix(pkl4) elif type(pkl4) == MotifsNodeCalculator: pkl4 = pkl4._features if type(pkl4) == list: motif4 = np.array(pkl4) else: motif4 = self._to_matrix_(pkl4) else: motif4 = np.array(pkl4) self._motif_mat = np.hstack((motif3, motif4)) print(str(datetime.datetime.now()) + " , Calculated motifs") return g_ftrs = GraphFeatures(self._graph, self._motif_features, dir_path=self._dir_path) g_ftrs.build(should_dump=True) print(str(datetime.datetime.now()) + " , Calculated motifs") self._motif_mat = np.hstack((np.asarray(g_ftrs['motif3']._features), np.asarray(g_ftrs['motif4']._features)))
header = [] for i in range(num_motifs): header.append((motif_type, i)) return header @staticmethod def is_motif(ftr): return ftr == 'motif4' or ftr == "motif3" if __name__ == "__main__": import networkx as nx from feature_meta import NODE_FEATURES gnx = nx.Graph() gnx.add_edges_from([ (1, 2), (1, 3), (2, 3), (2, 7), (7, 8), (3, 6), (4, 6), (6, 8), (5, 6), ]) gnx_ftr = GraphFeatures(gnx, NODE_FEATURES, ".", is_max_connected=True) gnx_ftr.build() m = MotifRatio(gnx_ftr, False) e = 0
class FeatureCalculator: def __init__(self, edge_path, dir_path, features, acc=True, directed=False, gpu=False, device=2, verbose=True, params=None): """ A class used to calculate features for a given graph, input as a text-like file. :param edge_path: str Path to graph edges file (text-like file, e.g. txt or csv), from which the graph is built using networkx. The graph must be unweighted. If its vertices are not [0, 1, ..., n-1], they are mapped to become [0, 1, ..., n-1] and the mapping is saved. Every row in the edges file should include "source_id,distance_id", without a header row. :param dir_path: str Path to the directory in which the feature calculations will be (or already are) located. :param features: list of strings List of the names of each feature. Could be any name from features_meta.py or "additional_features". :param acc: bool Whether to run the accelerated features, assuming it is possible to do so. :param directed: bool Whether the built graph is directed. :param gpu: bool Whether to use GPUs, assuming it is possible to do so (i.e. the GPU exists and the CUDA matches). :param device: int If gpu is True, indicates on which GPU device to calculate. Will return error if the index doesn't match the available GPUs. :param verbose: bool Whether to print things indicating the phases of calculations. :param params: dict, or None For clique detection uses, this is a dictionary of the graph settings (size, directed, clique size, edge probability). Ignored for any other use. """ self._dir_path = dir_path self._features = features # By their name as appears in accelerated_features_meta self._gpu = gpu self._device = device self._verbose = verbose self._logger = multi_logger([PrintLogger("Logger", level=logging.DEBUG), FileLogger("FLogger", path=dir_path, level=logging.INFO)], name=None) \ if verbose else None self._params = params self._load_graph(edge_path, directed) self._get_feature_meta( features, acc) # acc determines whether to use the accelerated features self._adj_matrix = None self._raw_features = None self._other_features = None def _load_graph(self, edge_path, directed=False): self._graph = nx.read_edgelist( edge_path, delimiter=',', create_using=nx.DiGraph() if directed else nx.Graph()) vertices = np.array(self._graph.nodes) should_be_vertices = np.arange(len(vertices)) self._mapping = {i: v for i, v in enumerate(self._graph)} if not np.array_equal(vertices, should_be_vertices): if self._verbose: self._logger.debug("Relabeling vertices to [0, 1, ..., n-1]") pickle.dump( self._mapping, open(os.path.join(self._dir_path, "vertices_mapping.pkl"), "wb")) self._graph = nx.convert_node_labels_to_integers(self._graph) if self._verbose: self._logger.info(str(datetime.datetime.now()) + " , Loaded graph") self._logger.debug("Graph Size: %d Nodes, %d Edges" % (len(self._graph), len(self._graph.edges))) def _get_feature_meta(self, features, acc): if acc: from accelerated_features_meta import FeaturesMeta features_meta_kwargs = dict(gpu=self._gpu, device=self._device) else: from features_meta import FeaturesMeta features_meta_kwargs = dict() all_node_features = FeaturesMeta(**features_meta_kwargs).NODE_LEVEL self._features = {} self._special_features = [] for key in features: if key in [ 'degree', 'in_degree', 'out_degree', 'additional_features' ]: self._special_features.append(key) elif key not in all_node_features: if self._verbose: self._logger.debug( "Feature %s unknown, ignoring this feature" % key) features.remove(key) continue else: self._features[key] = all_node_features[key] def calculate_features(self, dumping_specs=None): """ :param dumping_specs: A dictionary of specifications how to dump the non-special features. The default is saving the class only (as a pickle file). 'object': What to save - either 'class' (save the calculator with the features inside), 'feature' (the feature itself only, saved as name + '_ftr') or 'both'. Note that if only the feature is saved, when one calls the calculator again, the class will not load the feature and instead calculate it again. 'file_type': If the feature itself is saved, one can choose between two formats: either 'pkl' (save the feature as a pickle file, as is) or 'csv' (save a csv file of the feature values). 'vertex_names': If the features are saved as a csv file, there is an option of saving the name of each vertex in each row, before the feature values. The value here is a boolean indicating whether to put the original names the vertices in the beginning of each row. """ if not len(self._features) + len( self._special_features) and self._verbose: print("No features were chosen!") else: self._adj_matrix = nx.adjacency_matrix(self._graph) # self._adj_matrix = self._adj_matrix.toarray() self._raw_features = GraphFeatures(gnx=self._graph, features=self._features, dir_path=self._dir_path, logger=self._logger) if dumping_specs is not None: if 'vertex_names' in dumping_specs: if dumping_specs['vertex_names']: dumping_specs['vertex_names'] = self._mapping else: del dumping_specs['vertex_names'] self._raw_features.build(should_dump=True, dumping_specs=dumping_specs) self._other_features = OtherFeatures(self._graph, self._special_features, self._dir_path, self._params, self._logger) self._other_features.build(should_dump=True) self._logger.info( str(datetime.datetime.now()) + " , Calculated features") @property def feature_matrix(self): return np.hstack((self._raw_features.to_matrix(mtype=np.array), self._other_features.feature_matrix)) @property def adjacency_matrix(self): return self._adj_matrix
def _calc_features(self, pkl=True): # load dictionary if exists if pkl and self._ftr_pkl_name() in os.listdir( os.path.join(self._base_dir, 'pkl', 'ftr_by_time_dictionaries')): self._features_by_time, self._multi_graphs_by_time = \ pickle.load(open(os.path.join(self._base_dir, 'pkl', 'ftr_by_time_dictionaries', self._ftr_pkl_name()), "rb")) return self._load_database() labels = self._database.labels # make directory for database dir_path = os.path.join(self._base_dir, 'pkl', 'graph_measures') if self._params['database_full_name'] not in os.listdir(dir_path): os.mkdir(os.path.join(dir_path, self._params['database_full_name'])) dir_path = os.path.join(dir_path, self._params['database_full_name']) # calculate features for i, multi_graph in enumerate( self._database.multi_graph_by_window( self._params['window_size'], self._params['start_time'])): if "time_" + str(i) not in os.listdir(dir_path): os.mkdir(os.path.join(dir_path, "time_" + str(i))) mg_dir_path = os.path.join(dir_path, "time_" + str(i)) ftr_tmp_dict = {} # nodes_and_edges = {} for name in multi_graph.graph_names(): if name not in os.listdir(mg_dir_path): os.mkdir(os.path.join(mg_dir_path, name)) gnx_dir_path = os.path.join(mg_dir_path, name) raw_ftr = GraphFeatures( multi_graph.get_gnx(name), NODE_FEATURES_ML, dir_path=gnx_dir_path, is_max_connected=self._params['max_connected'], logger=PrintLogger(self._params['database_full_name'])) raw_ftr.build(should_dump=True) # build features nodes_and_edges = [ np.log(1 + multi_graph.node_count(graph_id=name)), np.log(1 + multi_graph.edge_count(graph_id=name)) ] # nodes_and_edges = [multi_graph.node_count(graph_id=name), multi_graph.edge_count(graph_id=name)] # nodes_and_edges[name] = [multi_graph.node_count(graph_id=name), multi_graph.edge_count(graph_id=name)] # ====================== motif ratio ======================== ftr_tmp_dict[name] = ( FeaturesProcessor(raw_ftr).activate_motif_ratio_vec( to_add=nodes_and_edges), labels[name]) # ==================== ftr correlation ====================== # ftr_tmp_dict[name] = (FeaturesProcessor(raw_ftr).as_matrix(norm_func=log_norm)) # ftr_tmp_dict[name] = (FeaturesProcessor(raw_ftr).as_matrix()) # concat_mx = np.vstack([mx for name, mx in ftr_tmp_dict.items()]) # pearson_picker = PearsonFeaturePicker(concat_mx, size=self._params['ftr_pairs'], # identical_bar=0.9) # best_pairs = pearson_picker.best_pairs() # beta = LinearContext(multi_graph, ftr_tmp_dict, best_pairs, window_size=len(ftr_tmp_dict)) # beta_matrix = beta.beta_matrix() # node and edges can pe appended here # for j, name in enumerate(multi_graph.graph_names()): # ftr_tmp_dict[name] = (np.hstack((beta_matrix[j], nodes_and_edges[name])), labels[name]) self._features_by_time.append(ftr_tmp_dict) multi_graph.suspend_logger() self._multi_graphs_by_time.append(multi_graph) pickle.dump((self._features_by_time, self._multi_graphs_by_time), open( os.path.join(self._base_dir, 'pkl', 'ftr_by_time_dictionaries', self._ftr_pkl_name()), "wb"))