def forward(self, graph, return_dict=False): # run LINE algorithm, 1-order, 2-order or 3(1-order + 2-order) nx_g = graph.to_networkx() self.G = nx_g self.is_directed = nx.is_directed(self.G) self.num_node = nx_g.number_of_nodes() self.num_edge = nx_g.number_of_edges() self.num_sampling_edge = self.walk_length * self.walk_num * self.num_node node2id = dict([(node, vid) for vid, node in enumerate(nx_g.nodes())]) self.edges = [[node2id[e[0]], node2id[e[1]]] for e in self.G.edges()] self.edges_prob = np.asarray([nx_g[u][v].get("weight", 1.0) for u, v in nx_g.edges()]) self.edges_prob /= np.sum(self.edges_prob) self.edges_table, self.edges_prob = alias_setup(self.edges_prob) degree_weight = np.asarray([0] * self.num_node) for u, v in nx_g.edges(): degree_weight[node2id[u]] += nx_g[u][v].get("weight", 1.0) if not self.is_directed: degree_weight[node2id[v]] += nx_g[u][v].get("weight", 1.0) self.node_prob = np.power(degree_weight, 0.75) self.node_prob /= np.sum(self.node_prob) self.node_table, self.node_prob = alias_setup(self.node_prob) if self.order == 3: self.dimension = int(self.dimension / 2) if self.order == 1 or self.order == 3: print("train line with 1-order") print(type(self.dimension)) self.emb_vertex = (np.random.random((self.num_node, self.dimension)) - 0.5) / self.dimension self._train_line(order=1) embedding1 = preprocessing.normalize(self.emb_vertex, "l2") if self.order == 2 or self.order == 3: print("train line with 2-order") self.emb_vertex = (np.random.random((self.num_node, self.dimension)) - 0.5) / self.dimension self.emb_context = self.emb_vertex self._train_line(order=2) embedding2 = preprocessing.normalize(self.emb_vertex, "l2") if self.order == 1: embeddings = embedding1 elif self.order == 2: embeddings = embedding2 else: print("concatenate two embedding...") embeddings = np.hstack((embedding1, embedding2)) if return_dict: features_matrix = dict() for vid, node in enumerate(nx_g.nodes()): features_matrix[node] = embeddings[vid] else: features_matrix = np.zeros((graph.num_nodes, embeddings.shape[1])) nx_nodes = nx_g.nodes() features_matrix[nx_nodes] = embeddings[np.arange(graph.num_nodes)] return features_matrix
def forward(self, data): G = nx.DiGraph() row, col = data.edge_index G.add_edges_from(list(zip(row.numpy(), col.numpy()))) self.G = G self.node_type = data.pos.tolist() self.num_node = G.number_of_nodes() self.num_edge = G.number_of_edges() self.num_sampling_edge = self.walk_length * self.walk_num * self.num_node self.num_node_type = len(set(self.node_type)) context_node = [nid for nid, ntype in enumerate(self.node_type) if ntype == 0] self.edges, self.edges_prob = [[] for _ in range(self.num_node_type)], [] self.node_prob, self.id2node = [], [dict() for _ in range(self.num_node_type)] subgraphs = [] for i in range(self.num_node_type): for j in range(i + 1, self.num_node_type): context_node = [nid for nid, ntype in enumerate(self.node_type) if ntype == i or ntype == j] sub_graph = nx.Graph() sub_graph = self.G.subgraph(context_node) if sub_graph.number_of_edges() != 0: subgraphs.append(sub_graph) self.num_graph = len(subgraphs) print("number of subgraph", self.num_graph) for i in range(self.num_graph): self.edges[i] = [[e[0], e[1]] for e in subgraphs[i].edges()] edges_prob = np.asarray([subgraphs[i][u][v].get("weight", 1.0) for u, v in self.edges[i]]) edges_prob /= np.sum(edges_prob) edges_table_prob = alias_setup(edges_prob) self.edges_prob.append(edges_table_prob) context_node = subgraphs[i].nodes() self.id2node[i] = dict(zip(range(len(context_node)), context_node)) node2id = dict(zip(context_node, range(len(context_node)))) degree_weight = np.asarray([0] * len(context_node)) for u in context_node: for v in list(subgraphs[i].neighbors(u)): degree_weight[node2id[u]] += subgraphs[i][u][v].get("weight", 1.0) node_prob = np.power(degree_weight, 0.75) node_prob /= np.sum(node_prob) nodes_table_prob = alias_setup(node_prob) self.node_prob.append(nodes_table_prob) print("train pte with 2-order") self.emb_vertex = (np.random.random((self.num_node, self.dimension)) - 0.5) / self.dimension self.emb_context = self.emb_vertex self._train_line() embedding = preprocessing.normalize(self.emb_vertex, "l2") return embedding
def train(self, G): # run LINE algorithm, 1-order, 2-order or 3(1-order + 2-order) self.G = G self.is_directed = nx.is_directed(self.G) self.num_node = G.number_of_nodes() self.num_edge = G.number_of_edges() self.num_sampling_edge = self.walk_length * self.walk_num * self.num_node node2id = dict([(node, vid) for vid, node in enumerate(G.nodes())]) self.edges = [[node2id[e[0]], node2id[e[1]]] for e in self.G.edges()] self.edges_prob = np.asarray([G[u][v].get("weight", 1.0) for u, v in G.edges()]) self.edges_prob /= np.sum(self.edges_prob) self.edges_table, self.edges_prob = alias_setup(self.edges_prob) degree_weight = np.asarray([0] * self.num_node) for u, v in G.edges(): degree_weight[node2id[u]] += G[u][v].get("weight", 1.0) if not self.is_directed: degree_weight[node2id[v]] += G[u][v].get("weight", 1.0) self.node_prob = np.power(degree_weight, 0.75) self.node_prob /= np.sum(self.node_prob) self.node_table, self.node_prob = alias_setup(self.node_prob) if self.order == 3: self.dimension = int(self.dimension / 2) if self.order == 1 or self.order == 3: print("train line with 1-order") print(type(self.dimension)) self.emb_vertex = (np.random.random((self.num_node, self.dimension)) - 0.5) / self.dimension self._train_line(order=1) embedding1 = preprocessing.normalize(self.emb_vertex, "l2") if self.order == 2 or self.order == 3: print("train line with 2-order") self.emb_vertex = (np.random.random((self.num_node, self.dimension)) - 0.5) / self.dimension self.emb_context = self.emb_vertex self._train_line(order=2) embedding2 = preprocessing.normalize(self.emb_vertex, "l2") if self.order == 1: self.embeddings = embedding1 elif self.order == 2: self.embeddings = embedding2 else: print("concatenate two embedding...") self.embeddings = np.hstack((embedding1, embedding2)) return self.embeddings
def _get_alias_edge(self, src, dst): # Get the alias edge setup lists for a given edge. G = self.G unnormalized_probs = [] for dst_nbr in G.neighbors(dst): if dst_nbr == src: unnormalized_probs.append(G[dst][dst_nbr]["weight"] / self.p) elif G.has_edge(dst_nbr, src): unnormalized_probs.append(G[dst][dst_nbr]["weight"]) else: unnormalized_probs.append(G[dst][dst_nbr]["weight"] / self.q) norm_const = sum(unnormalized_probs) normalized_probs = [ float(u_prob) / norm_const for u_prob in unnormalized_probs ] return alias_setup(normalized_probs)
def _preprocess_transition_probs(self): # Preprocessing of transition probabilities for guiding the random walks. G = self.G is_directed = nx.is_directed(self.G) print(len(list(G.nodes()))) print(len(list(G.edges()))) s = time.time() alias_nodes = {} for node in G.nodes(): unnormalized_probs = [ G[node][nbr]["weight"] for nbr in G.neighbors(node) ] norm_const = sum(unnormalized_probs) normalized_probs = [ float(u_prob) / norm_const for u_prob in unnormalized_probs ] alias_nodes[node] = alias_setup(normalized_probs) t = time.time() print("alias_nodes", t - s) alias_edges = {} s = time.time() if is_directed: for edge in G.edges(): alias_edges[edge] = self._get_alias_edge(edge[0], edge[1]) else: for edge in G.edges(): alias_edges[edge] = self._get_alias_edge(edge[0], edge[1]) alias_edges[(edge[1], edge[0])] = self._get_alias_edge( edge[1], edge[0]) t = time.time() print("alias_edges", t - s) self.alias_nodes = alias_nodes self.alias_edges = alias_edges return
def train(self, G): self.G = G node2id = dict([(node, vid) for vid, node in enumerate(G.nodes())]) self.is_directed = nx.is_directed(self.G) self.num_node = self.G.number_of_nodes() self.num_edge = G.number_of_edges() self.edges = [[node2id[e[0]], node2id[e[1]]] for e in self.G.edges()] id2node = dict(zip(node2id.values(), node2id.keys())) self.num_neigh = np.asarray([len(list(self.G.neighbors(id2node[i]))) for i in range(self.num_node)]) self.neighbors = [[node2id[v] for v in self.G.neighbors(id2node[i])] for i in range(self.num_node)] s = time.time() self.alias_nodes = {} self.node_weight = {} for i in range(self.num_node): unnormalized_probs = [G[id2node[i]][nbr].get("weight", 1.0) for nbr in G.neighbors(id2node[i])] norm_const = sum(unnormalized_probs) normalized_probs = [float(u_prob) / norm_const for u_prob in unnormalized_probs] self.alias_nodes[i] = alias_setup(normalized_probs) self.node_weight[i] = dict( zip( [node2id[nbr] for nbr in G.neighbors(id2node[i])], unnormalized_probs, ) ) t = time.time() print("alias_nodes", t - s) # run netsmf algorithm with multiprocessing and apply randomized svd print("number of sample edges ", self.num_round * self.num_edge * self.window_size) print("random walk start...") t0 = time.time() results = [] pool = Pool(processes=self.worker) for i in range(self.worker): results.append(pool.apply_async(func=self._random_walk_matrix, args=(i,))) pool.close() pool.join() print("random walk time", time.time() - t0) matrix = sp.csr_matrix((self.num_node, self.num_node)) A = sp.csr_matrix(nx.adjacency_matrix(self.G)) degree = sp.diags(np.array(A.sum(axis=0))[0], format="csr") degree_inv = degree.power(-1) t1 = time.time() for res in results: matrix += res.get() t2 = time.time() print("construct random walk matrix time", time.time() - t1) L = sp.csgraph.laplacian(matrix, normed=False, return_diag=False) M = degree_inv.dot(degree - L).dot(degree_inv) M = M * A.sum() / self.negative M.data[M.data <= 1] = 1 M.data = np.log(M.data) M.eliminate_zeros() print("number of nzz", M.nnz) print("construct matrix sparsifier time", time.time() - t2) embedding = self._get_embedding_rand(M) return embedding