def _calc_motif3(self):
     raw_ftr = GraphFeatures(self._graph,
                             {"motif3": FeatureMeta(nth_nodes_motif(3, gpu=self._gpu, device=self._device), {"m3"})},
                             dir_path=self._dir_path)
     raw_ftr.build(should_dump=True)
     feature = raw_ftr['motif3']._features
     if type(feature) == dict:
         motif_matrix = self._to_matrix(feature)
     else:
         motif_matrix = feature
     normed_matrix = self._log_norm(motif_matrix)
     if self._motif_choice == "All_Motifs":
         mp = MotifProbability(self._params['vertices'], self._params['probability'],
                               self._params['subgraph_size'], self._params['directed'])
         clique_motifs = mp.get_3_clique_motifs(3)
         return normed_matrix[:, clique_motifs]
     else:
         return normed_matrix
 def _calc_motif4(self):
     raw_ftr = GraphFeatures(self._graph,
                             {"motif4": FeatureMeta(nth_nodes_motif(4, gpu=self._gpu, device=self._device), {"m4"})},
                             dir_path=self._dir_path)
     raw_ftr.build(should_dump=True)
     feature = raw_ftr['motif4']._features
     if type(feature) == dict:
         motif_matrix = self._to_matrix(feature)
     else:
         motif_matrix = feature
     normed_matrix = self._log_norm(motif_matrix)
     if self._motif_choice == "All_Motifs":
         mp = MotifProbability(self._params['vertices'], self._params['probability'],
                               self._params['subgraph_size'], self._params['directed'])
         motif3_count = 1 + mp.get_3_clique_motifs(3)[-1]  # The full 3 clique is the last motif 3.
         clique_motifs = [m - motif3_count for m in mp.get_3_clique_motifs(4)]
         return normed_matrix[:, clique_motifs]
     else:
         return normed_matrix
예제 #3
0
 def _calc_additional_features(self):
     # MUST BE AFTER CALCULATING MOTIFS
     if "Motif_3" not in self._features:
         raise KeyError(
             "Motifs must be calculated prior to the additional features")
     else:
         motif_index_in_features = self._features.index("Motif_3")
         motif_matrix = self._feature_matrix[motif_index_in_features]
         mp = MotifProbability(self._params['vertices'],
                               self._params['probability'],
                               self._params['subgraph_size'],
                               self._params['directed'])
         motif3_count = 1 + mp.get_3_clique_motifs(3)[
             -1]  # The full 3 clique is the last motif 3.
         add_ftrs = AdditionalFeatures(self._params,
                                       self._graph,
                                       motif_matrix,
                                       motifs=list(range(motif3_count)))
     return self._log_norm(add_ftrs.calculate_extra_ftrs())
 def _calc_motif4(self):
     # FOR NOW, NO GPU FOR US
     if os.path.exists(os.path.join(self._dir_path, "motif4.pkl")):
         pkl4 = pickle.load(
             open(os.path.join(self._dir_path, "motif4.pkl"), "rb"))
         if type(pkl4) == dict:
             motif4 = self._to_matrix(pkl4)
         elif type(pkl4) == MotifsNodeCalculator:
             motif4 = np.array(pkl4._features)
         else:
             motif4 = np.array(pkl4)
         if self._motif_choice == "All_Motifs":
             mp = MotifProbability(self._params['vertices'],
                                   self._params['probability'],
                                   self._params['clique_size'],
                                   self._params['directed'])
             motif3_count = 1 + mp.get_3_clique_motifs(3)[
                 -1]  # The full 3 clique is the last motif 3.
             clique_motifs = [
                 m - motif3_count for m in mp.get_3_clique_motifs(4)
             ]
             return motif4[:, clique_motifs]
         else:
             return motif4
     raw_ftr = GraphFeatures(self._graph, {
         "motif4":
         FeatureMeta(nth_nodes_motif(4, gpu=self._gpu, device=self._device),
                     {"m4"})
     },
                             dir_path=self._dir_path)
     raw_ftr.build(should_dump=True)
     feature = raw_ftr['motif4']._features
     if type(feature) == dict:
         motif_matrix = self._to_matrix(feature)
     else:
         motif_matrix = feature
     normed_matrix = self._log_norm(motif_matrix)
     if self._motif_choice == "All_Motifs":
         mp = MotifProbability(self._params['vertices'],
                               self._params['probability'],
                               self._params['clique_size'],
                               self._params['directed'])
         motif3_count = 1 + mp.get_3_clique_motifs(3)[
             -1]  # The full 3 clique is the last motif 3.
         clique_motifs = [
             m - motif3_count for m in mp.get_3_clique_motifs(4)
         ]
         return normed_matrix[:, clique_motifs]
     else:
         return normed_matrix
 def _calc_additional_features(self):
     # MUST BE AFTER CALCULATING MOTIFS
     if self._motif_choice is None:
         raise KeyError(
             "Motifs must be calculated prior to the additional features")
     else:
         if self._motif_choice == "All_Motifs":
             motif_matrix = np.hstack(
                 (pickle.load(
                     open(os.path.join(self._dir_path, "Motif_3.pkl"),
                          "rb")),
                  pickle.load(
                      open(os.path.join(self._dir_path, "Motif_4.pkl"),
                           "rb"))))
             add_ftrs = AdditionalFeatures(self._params, self._graph,
                                           self._dir_path, motif_matrix)
         elif self._motif_choice == "Motif_3":
             motif_matrix = pickle.load(
                 open(os.path.join(self._dir_path, "Motif_3.pkl"), "rb"))
             mp = MotifProbability(self._params['vertices'],
                                   self._params['probability'],
                                   self._params['clique_size'],
                                   self._params['directed'])
             motif3_count = 1 + mp.get_3_clique_motifs(3)[
                 -1]  # The full 3 clique is the last motif 3.
             add_ftrs = AdditionalFeatures(self._params,
                                           self._graph,
                                           self._dir_path,
                                           motif_matrix,
                                           motifs=list(range(motif3_count)))
         else:
             motif_matrix = np.hstack(
                 (pickle.load(
                     open(os.path.join(self._dir_path, "Motif_3.pkl"),
                          "rb")),
                  pickle.load(
                      open(os.path.join(self._dir_path, "Motif_4.pkl"),
                           "rb"))))
             mp = MotifProbability(self._params['vertices'],
                                   self._params['probability'],
                                   self._params['clique_size'],
                                   self._params['directed'])
             motif3_count = 1 + mp.get_3_clique_motifs(3)[
                 -1]  # The full 3 clique is the last motif 3.
             motif4_count = 1 + mp.get_3_clique_motifs(4)[
                 -1]  # The full 4 clique is the last motif 4.
             add_ftrs = AdditionalFeatures(
                 self._params,
                 self._graph,
                 self._dir_path,
                 motif_matrix,
                 motifs=list(range(motif3_count, motif4_count)))
     return add_ftrs.calculate_extra_ftrs()
def aggregate_results():
    # Assuming validation is complete.
    from additional_features import MotifProbability
    from generate_graphs_2 import SIZES, PROB, DIRECTED, NUM_RUNS
    if not os.path.exists("results_2"):
        os.mkdir("results_2")
    for size, is_directed, level in itertools.product(SIZES, DIRECTED, [3, 4]):
        dump_dirname = os.path.join(
            "results_2",
            f"size{size}_p{PROB}_directed{is_directed}_motif{level}")
        if not os.path.exists(dump_dirname):
            os.mkdir(dump_dirname)
        mp = MotifProbability(size=size,
                              edge_probability=PROB,
                              clique_size=0,
                              directed=is_directed)
        expected_motifs = pd.DataFrame([
            mp.motif_expected_non_clique_vertex(motif_index=idx)
            for idx in get_motif_indices(level, is_directed)
        ]).T
        expected_motifs.to_csv(os.path.join(dump_dirname,
                                            "expected_motifs.csv"),
                               header=None,
                               index=None)
        dirname = f"size{size}_p{PROB}_directed{is_directed}_runs"
        for run in range(NUM_RUNS):
            res_df = pd.DataFrame(
                res_to_matrix(
                    pickle.load(
                        open(
                            os.path.join(dirname, f"run_{run}", "motifs_gpu",
                                         f"motif{level}.pkl"), "rb"))))
            if not os.path.exists(os.path.join(dump_dirname, f"run_{run}")):
                os.mkdir(os.path.join(dump_dirname, f"run_{run}"))
            res_df.to_csv(os.path.join(dump_dirname, f"run_{run}",
                                       "motifs.csv"),
                          header=None,
                          index=None)
 def _calc_motif3(self):
     # FOR NOW, NO GPU FOR US
     if os.path.exists(os.path.join(self._dir_path, "motif3.pkl")):
         pkl3 = pickle.load(
             open(os.path.join(self._dir_path, "motif3.pkl"), "rb"))
         if type(pkl3) == dict:
             motif3 = self._to_matrix(pkl3)
         elif type(pkl3) == MotifsNodeCalculator:
             motif3 = np.array(pkl3._features)
         else:
             motif3 = np.array(pkl3)
         if self._motif_choice == "All_Motifs":
             mp = MotifProbability(self._params['vertices'],
                                   self._params['probability'],
                                   self._params['clique_size'],
                                   self._params['directed'])
             clique_motifs = mp.get_3_clique_motifs(3)
             return motif3[:, clique_motifs]
         else:
             return motif3
     raw_ftr = GraphFeatures(self._graph, {
         "motif3":
         FeatureMeta(nth_nodes_motif(3, gpu=self._gpu, device=self._device),
                     {"m3"})
     },
                             dir_path=self._dir_path)
     raw_ftr.build(should_dump=True)
     feature = raw_ftr['motif3']._features
     if type(feature) == dict:
         motif_matrix = self._to_matrix(feature)
     else:
         motif_matrix = feature
     normed_matrix = self._log_norm(motif_matrix)
     if self._motif_choice == "All_Motifs":
         mp = MotifProbability(self._params['vertices'],
                               self._params['probability'],
                               self._params['clique_size'],
                               self._params['directed'])
         clique_motifs = mp.get_3_clique_motifs(3)
         return normed_matrix[:, clique_motifs]
     else:
         return normed_matrix
if __name__ == '__main__':
    # Assuming validation is complete.
    from additional_features import MotifProbability
    from generate_graphs_2 import SIZES, PROB, DIRECTED, NUM_RUNS

    if not os.path.exists("results_2"):
        os.mkdir("results_2")
    for size, is_directed, level in itertools.product(SIZES, DIRECTED, [3, 4]):
        dump_dirname = os.path.join(
            "results_2",
            f"size{size}_p{PROB}_directed{is_directed}_motif{level}")
        if not os.path.exists(dump_dirname):
            os.mkdir(dump_dirname)
        mp = MotifProbability(size=size,
                              edge_probability=PROB,
                              clique_size=0,
                              directed=is_directed)

        with open(os.path.join(dump_dirname, "expected_motifs.csv"),
                  newline='') as expected_motifs_file:
            for row in expected_motifs_file:
                expected_motifs = row
        expected_motifs = list(expected_motifs.split(","))
        for i in range(len(expected_motifs)):
            expected_motifs[i] = float(expected_motifs[i])

        dirname = f"size{size}_p{PROB}_directed{is_directed}_runs"
        motifs = []
        for run in range(NUM_RUNS):
            with open(os.path.join(dump_dirname, f"run_{run}", "motifs.csv"),
                      newline='') as motifs_file: