Beispiel #1
0
    def __init__(self,
                 graphs: MultiGraph,
                 dict_mx,
                 feature_pairs,
                 window_size=None):
        self._interval = int(
            graphs.number_of_graphs()) if window_size is None else window_size
        self._all_features = []
        self._dict_mx = dict_mx
        for graph in graphs.graph_names():
            m = self._dict_mx[graph]
            # self._nodes_for_graph.append(m.shape[0])
            # append graph features
            self._all_features.append(m)
            # append 0.001 for all missing nodes
            self._all_features.append(
                np.ones((graphs.node_count(graphs.name_to_index(graph)) -
                         m.shape[0], m.shape[1])) * 0.001)
        # create one big matrix of everything - rows: nodes, columns: features
        self._all_features = np.concatenate(self._all_features)

        # all_ftr_graph_index - [ .... last_row_index_for_graph_i ... ]
        self._all_ftr_graph_index = np.cumsum([0] +
                                              graphs.node_count()).tolist()
        super(LinearContext, self).__init__(graphs, feature_pairs)
Beispiel #2
0
 def _build_multi_graphs(self):
     time_mg_dict = {}
     for time, mg_dict in self._edge_list_dict.items():
         # create multi graph for each time and order the subgraphs according to the order dictionary (FIFO)
         mg = MultiGraph(time, graphs_source=mg_dict, directed=self._directed)
         mg.sort_by(self._func_order)
         time_mg_dict[time] = mg
     return time_mg_dict
Beispiel #3
0
 def create_multigraph(self, mg_dict):
     mg = MultiGraph("dblp", graphs_source=mg_dict)
     mg.sort_by(self.sort_gnx)
     mg.suspend_logger()
     check("./dataset/" + str(self.dataset_name) + "/pkl/")
     pickle.dump(mg, open("./dataset/" + str(self.dataset_name) + "/pkl/mg_" + self.dataset_name + ".pkl", "wb"))
     mg.wake_logger()
     return mg
Beispiel #4
0
    def _build_multi_graph(self):
        path_pkl = os.path.join(
            self._base_dir, PKL_DIR, self._params.DATASET_NAME + "_split_" +
            str(self._params.PERCENTAGE) + "_mg.pkl")
        if os.path.exists(path_pkl):
            return pickle.load(open(path_pkl, "rb"))
        multi_graph_dict = {}
        labels = {}
        label_to_idx = {}
        # open basic data csv (with all edges of all times)
        data_df = pd.read_csv(self._src_file_path)
        stop = data_df.shape[0] * self._params.PERCENTAGE

        for index, edge in data_df.iterrows():
            if index > stop:
                break
            # write edge to dictionary
            graph_id = str(edge[self._params.GRAPH_NAME_COL])
            src = str(edge[self._params.SRC_COL])
            dst = str(edge[self._params.DST_COL])
            multi_graph_dict[graph_id] = multi_graph_dict.get(
                graph_id, []) + [(src, dst)]
            label = edge[self._params.LABEL_COL]
            label_to_idx[label] = len(
                label_to_idx
            ) if label not in label_to_idx else label_to_idx[label]
            labels[graph_id] = label_to_idx[label]

        mg = MultiGraph(self._params.DATASET_NAME,
                        graphs_source=multi_graph_dict,
                        directed=self._params.DIRECTED,
                        logger=self._logger)
        idx_to_label = [
            l for l in sorted(label_to_idx, key=lambda x: label_to_idx[x])
        ]
        mg.suspend_logger()
        pickle.dump((mg, labels, label_to_idx, idx_to_label),
                    open(path_pkl, "wb"))
        mg.wake_logger()
        return mg, labels, label_to_idx, idx_to_label
Beispiel #5
0
    def multi_graph_by_window(self, window_size=None, start_time=0):
        if start_time < 0 or start_time > self._number_of_times:
            self._logger.error("invalid start time = " + str(start_time) + ", total intervals = " + str(self._number_of_times))
            return
        # build base mg
        mg = MultiGraph(self._database_name + "window", graphs_source=self._edge_list_dict[self._times[0]],
                        directed=self._directed)
        for i in range(1, start_time):
            mg.add_edges(self._edge_list_dict[self._times[i]])

        window_size = window_size if window_size else self._number_of_times
        for i in range(start_time, self._number_of_times):
            mg.suspend_logger()
            temp = copy.deepcopy(mg)
            mg.wake_logger()
            yield temp

            to_remove = i - window_size
            if to_remove >= 0:
                mg.remove_edges(self._edge_list_dict[self._times[to_remove]])
            mg.add_edges(self._edge_list_dict[self._times[i]])
Beispiel #6
0
 def to_multi_graph(self):
     mg = MultiGraph(self._database_name, graphs_source=self._mg_dictionary, directed=self._directed)
     mg.sort_by(lambda x: datetime.strptime(x, self._format_out))
     return mg