def _calc_motif3(self): raw_ftr = GraphFeatures(self._graph, {"motif3": FeatureMeta(nth_nodes_motif(3, gpu=self._gpu, device=self._device), {"m3"})}, dir_path=self._dir_path) raw_ftr.build(should_dump=True) feature = raw_ftr['motif3']._features if type(feature) == dict: motif_matrix = self._to_matrix(feature) else: motif_matrix = feature normed_matrix = self._log_norm(motif_matrix) if self._motif_choice == "All_Motifs": mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['subgraph_size'], self._params['directed']) clique_motifs = mp.get_3_clique_motifs(3) return normed_matrix[:, clique_motifs] else: return normed_matrix
def _calc_motif4(self): raw_ftr = GraphFeatures(self._graph, {"motif4": FeatureMeta(nth_nodes_motif(4, gpu=self._gpu, device=self._device), {"m4"})}, dir_path=self._dir_path) raw_ftr.build(should_dump=True) feature = raw_ftr['motif4']._features if type(feature) == dict: motif_matrix = self._to_matrix(feature) else: motif_matrix = feature normed_matrix = self._log_norm(motif_matrix) if self._motif_choice == "All_Motifs": mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['subgraph_size'], self._params['directed']) motif3_count = 1 + mp.get_3_clique_motifs(3)[-1] # The full 3 clique is the last motif 3. clique_motifs = [m - motif3_count for m in mp.get_3_clique_motifs(4)] return normed_matrix[:, clique_motifs] else: return normed_matrix
def _calc_additional_features(self): # MUST BE AFTER CALCULATING MOTIFS if "Motif_3" not in self._features: raise KeyError( "Motifs must be calculated prior to the additional features") else: motif_index_in_features = self._features.index("Motif_3") motif_matrix = self._feature_matrix[motif_index_in_features] mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['subgraph_size'], self._params['directed']) motif3_count = 1 + mp.get_3_clique_motifs(3)[ -1] # The full 3 clique is the last motif 3. add_ftrs = AdditionalFeatures(self._params, self._graph, motif_matrix, motifs=list(range(motif3_count))) return self._log_norm(add_ftrs.calculate_extra_ftrs())
def _calc_motif4(self): # FOR NOW, NO GPU FOR US if os.path.exists(os.path.join(self._dir_path, "motif4.pkl")): pkl4 = pickle.load( open(os.path.join(self._dir_path, "motif4.pkl"), "rb")) if type(pkl4) == dict: motif4 = self._to_matrix(pkl4) elif type(pkl4) == MotifsNodeCalculator: motif4 = np.array(pkl4._features) else: motif4 = np.array(pkl4) if self._motif_choice == "All_Motifs": mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['clique_size'], self._params['directed']) motif3_count = 1 + mp.get_3_clique_motifs(3)[ -1] # The full 3 clique is the last motif 3. clique_motifs = [ m - motif3_count for m in mp.get_3_clique_motifs(4) ] return motif4[:, clique_motifs] else: return motif4 raw_ftr = GraphFeatures(self._graph, { "motif4": FeatureMeta(nth_nodes_motif(4, gpu=self._gpu, device=self._device), {"m4"}) }, dir_path=self._dir_path) raw_ftr.build(should_dump=True) feature = raw_ftr['motif4']._features if type(feature) == dict: motif_matrix = self._to_matrix(feature) else: motif_matrix = feature normed_matrix = self._log_norm(motif_matrix) if self._motif_choice == "All_Motifs": mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['clique_size'], self._params['directed']) motif3_count = 1 + mp.get_3_clique_motifs(3)[ -1] # The full 3 clique is the last motif 3. clique_motifs = [ m - motif3_count for m in mp.get_3_clique_motifs(4) ] return normed_matrix[:, clique_motifs] else: return normed_matrix
def _calc_additional_features(self): # MUST BE AFTER CALCULATING MOTIFS if self._motif_choice is None: raise KeyError( "Motifs must be calculated prior to the additional features") else: if self._motif_choice == "All_Motifs": motif_matrix = np.hstack( (pickle.load( open(os.path.join(self._dir_path, "Motif_3.pkl"), "rb")), pickle.load( open(os.path.join(self._dir_path, "Motif_4.pkl"), "rb")))) add_ftrs = AdditionalFeatures(self._params, self._graph, self._dir_path, motif_matrix) elif self._motif_choice == "Motif_3": motif_matrix = pickle.load( open(os.path.join(self._dir_path, "Motif_3.pkl"), "rb")) mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['clique_size'], self._params['directed']) motif3_count = 1 + mp.get_3_clique_motifs(3)[ -1] # The full 3 clique is the last motif 3. add_ftrs = AdditionalFeatures(self._params, self._graph, self._dir_path, motif_matrix, motifs=list(range(motif3_count))) else: motif_matrix = np.hstack( (pickle.load( open(os.path.join(self._dir_path, "Motif_3.pkl"), "rb")), pickle.load( open(os.path.join(self._dir_path, "Motif_4.pkl"), "rb")))) mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['clique_size'], self._params['directed']) motif3_count = 1 + mp.get_3_clique_motifs(3)[ -1] # The full 3 clique is the last motif 3. motif4_count = 1 + mp.get_3_clique_motifs(4)[ -1] # The full 4 clique is the last motif 4. add_ftrs = AdditionalFeatures( self._params, self._graph, self._dir_path, motif_matrix, motifs=list(range(motif3_count, motif4_count))) return add_ftrs.calculate_extra_ftrs()
def aggregate_results(): # Assuming validation is complete. from additional_features import MotifProbability from generate_graphs_2 import SIZES, PROB, DIRECTED, NUM_RUNS if not os.path.exists("results_2"): os.mkdir("results_2") for size, is_directed, level in itertools.product(SIZES, DIRECTED, [3, 4]): dump_dirname = os.path.join( "results_2", f"size{size}_p{PROB}_directed{is_directed}_motif{level}") if not os.path.exists(dump_dirname): os.mkdir(dump_dirname) mp = MotifProbability(size=size, edge_probability=PROB, clique_size=0, directed=is_directed) expected_motifs = pd.DataFrame([ mp.motif_expected_non_clique_vertex(motif_index=idx) for idx in get_motif_indices(level, is_directed) ]).T expected_motifs.to_csv(os.path.join(dump_dirname, "expected_motifs.csv"), header=None, index=None) dirname = f"size{size}_p{PROB}_directed{is_directed}_runs" for run in range(NUM_RUNS): res_df = pd.DataFrame( res_to_matrix( pickle.load( open( os.path.join(dirname, f"run_{run}", "motifs_gpu", f"motif{level}.pkl"), "rb")))) if not os.path.exists(os.path.join(dump_dirname, f"run_{run}")): os.mkdir(os.path.join(dump_dirname, f"run_{run}")) res_df.to_csv(os.path.join(dump_dirname, f"run_{run}", "motifs.csv"), header=None, index=None)
def _calc_motif3(self): # FOR NOW, NO GPU FOR US if os.path.exists(os.path.join(self._dir_path, "motif3.pkl")): pkl3 = pickle.load( open(os.path.join(self._dir_path, "motif3.pkl"), "rb")) if type(pkl3) == dict: motif3 = self._to_matrix(pkl3) elif type(pkl3) == MotifsNodeCalculator: motif3 = np.array(pkl3._features) else: motif3 = np.array(pkl3) if self._motif_choice == "All_Motifs": mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['clique_size'], self._params['directed']) clique_motifs = mp.get_3_clique_motifs(3) return motif3[:, clique_motifs] else: return motif3 raw_ftr = GraphFeatures(self._graph, { "motif3": FeatureMeta(nth_nodes_motif(3, gpu=self._gpu, device=self._device), {"m3"}) }, dir_path=self._dir_path) raw_ftr.build(should_dump=True) feature = raw_ftr['motif3']._features if type(feature) == dict: motif_matrix = self._to_matrix(feature) else: motif_matrix = feature normed_matrix = self._log_norm(motif_matrix) if self._motif_choice == "All_Motifs": mp = MotifProbability(self._params['vertices'], self._params['probability'], self._params['clique_size'], self._params['directed']) clique_motifs = mp.get_3_clique_motifs(3) return normed_matrix[:, clique_motifs] else: return normed_matrix
if __name__ == '__main__': # Assuming validation is complete. from additional_features import MotifProbability from generate_graphs_2 import SIZES, PROB, DIRECTED, NUM_RUNS if not os.path.exists("results_2"): os.mkdir("results_2") for size, is_directed, level in itertools.product(SIZES, DIRECTED, [3, 4]): dump_dirname = os.path.join( "results_2", f"size{size}_p{PROB}_directed{is_directed}_motif{level}") if not os.path.exists(dump_dirname): os.mkdir(dump_dirname) mp = MotifProbability(size=size, edge_probability=PROB, clique_size=0, directed=is_directed) with open(os.path.join(dump_dirname, "expected_motifs.csv"), newline='') as expected_motifs_file: for row in expected_motifs_file: expected_motifs = row expected_motifs = list(expected_motifs.split(",")) for i in range(len(expected_motifs)): expected_motifs[i] = float(expected_motifs[i]) dirname = f"size{size}_p{PROB}_directed{is_directed}_runs" motifs = [] for run in range(NUM_RUNS): with open(os.path.join(dump_dirname, f"run_{run}", "motifs.csv"), newline='') as motifs_file: