def __init__(self, graphs: MultiGraph, dict_mx, feature_pairs, window_size=None): self._interval = int( graphs.number_of_graphs()) if window_size is None else window_size self._all_features = [] self._dict_mx = dict_mx for graph in graphs.graph_names(): m = self._dict_mx[graph] # self._nodes_for_graph.append(m.shape[0]) # append graph features self._all_features.append(m) # append 0.001 for all missing nodes self._all_features.append( np.ones((graphs.node_count(graphs.name_to_index(graph)) - m.shape[0], m.shape[1])) * 0.001) # create one big matrix of everything - rows: nodes, columns: features self._all_features = np.concatenate(self._all_features) # all_ftr_graph_index - [ .... last_row_index_for_graph_i ... ] self._all_ftr_graph_index = np.cumsum([0] + graphs.node_count()).tolist() super(LinearContext, self).__init__(graphs, feature_pairs)
def _build_multi_graphs(self): time_mg_dict = {} for time, mg_dict in self._edge_list_dict.items(): # create multi graph for each time and order the subgraphs according to the order dictionary (FIFO) mg = MultiGraph(time, graphs_source=mg_dict, directed=self._directed) mg.sort_by(self._func_order) time_mg_dict[time] = mg return time_mg_dict
def create_multigraph(self, mg_dict): mg = MultiGraph("dblp", graphs_source=mg_dict) mg.sort_by(self.sort_gnx) mg.suspend_logger() check("./dataset/" + str(self.dataset_name) + "/pkl/") pickle.dump(mg, open("./dataset/" + str(self.dataset_name) + "/pkl/mg_" + self.dataset_name + ".pkl", "wb")) mg.wake_logger() return mg
def _build_multi_graph(self): path_pkl = os.path.join( self._base_dir, PKL_DIR, self._params.DATASET_NAME + "_split_" + str(self._params.PERCENTAGE) + "_mg.pkl") if os.path.exists(path_pkl): return pickle.load(open(path_pkl, "rb")) multi_graph_dict = {} labels = {} label_to_idx = {} # open basic data csv (with all edges of all times) data_df = pd.read_csv(self._src_file_path) stop = data_df.shape[0] * self._params.PERCENTAGE for index, edge in data_df.iterrows(): if index > stop: break # write edge to dictionary graph_id = str(edge[self._params.GRAPH_NAME_COL]) src = str(edge[self._params.SRC_COL]) dst = str(edge[self._params.DST_COL]) multi_graph_dict[graph_id] = multi_graph_dict.get( graph_id, []) + [(src, dst)] label = edge[self._params.LABEL_COL] label_to_idx[label] = len( label_to_idx ) if label not in label_to_idx else label_to_idx[label] labels[graph_id] = label_to_idx[label] mg = MultiGraph(self._params.DATASET_NAME, graphs_source=multi_graph_dict, directed=self._params.DIRECTED, logger=self._logger) idx_to_label = [ l for l in sorted(label_to_idx, key=lambda x: label_to_idx[x]) ] mg.suspend_logger() pickle.dump((mg, labels, label_to_idx, idx_to_label), open(path_pkl, "wb")) mg.wake_logger() return mg, labels, label_to_idx, idx_to_label
def multi_graph_by_window(self, window_size=None, start_time=0): if start_time < 0 or start_time > self._number_of_times: self._logger.error("invalid start time = " + str(start_time) + ", total intervals = " + str(self._number_of_times)) return # build base mg mg = MultiGraph(self._database_name + "window", graphs_source=self._edge_list_dict[self._times[0]], directed=self._directed) for i in range(1, start_time): mg.add_edges(self._edge_list_dict[self._times[i]]) window_size = window_size if window_size else self._number_of_times for i in range(start_time, self._number_of_times): mg.suspend_logger() temp = copy.deepcopy(mg) mg.wake_logger() yield temp to_remove = i - window_size if to_remove >= 0: mg.remove_edges(self._edge_list_dict[self._times[to_remove]]) mg.add_edges(self._edge_list_dict[self._times[i]])
def to_multi_graph(self): mg = MultiGraph(self._database_name, graphs_source=self._mg_dictionary, directed=self._directed) mg.sort_by(lambda x: datetime.strptime(x, self._format_out)) return mg