def _spacey_metagraph_only_random_walk( self, root_node): # metapath, multi-metapath, metagraph root_type = self._net.get_node_type(root_node) if root_type not in self._metagraph: # root_type = random.choice(list(self._metagraph.nodes)) # root_node = random.choice(self._nodes_type_dict[root_type][0]) return [] if self._history_position == "local": history = np.ones([self.node_types_size], dtype=np.float64) elif self._history_position == "global": history = self._history # current node cur_node = root_node cur_type = root_type path = [str(cur_node)] for __ in range(self._walk_length): # logger.info("history={}".format(history)) # choose next type if random.random() < self._walk_restart: cur_node = root_node cur_type = root_type if random.random() < self._alpha: cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys() next_type_list = [ v for v in self._metagraph[cur_type] if v in cur_node_adj_typelist ] if len(next_type_list) == 0: # cur_type = root_type # cur_node = root_node break elif len(next_type_list) == 1: cur_type = next_type_list[0] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) else: occupancy = history[next_type_list] cur_type = utils.unigram_sample(population=next_type_list, size=1, weight=occupancy)[0] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) history[cur_type] += 1 else: cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys() next_type_list = [ v for v in self._metagraph[cur_type] if v in cur_node_adj_typelist ] if len(next_type_list) == 0: # cur_type = root_type # cur_node = root_node break else: cur_type = random.choice(next_type_list) cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) history[cur_type] += 1 path.append(str(cur_node)) return path
def _spacey_metaschema_only_random_walk(self, root_node, file_vector, file_dist, window_size): root_type = self._net.get_node_type(root_node) if self._history_position == "local": history = np.ones([self.node_types_size], dtype=np.float64) elif self._history_position == "global": history = self._history occur_count_vector = np.zeros(self.nodes_size, dtype=np.int32) occur_count_vector[root_node] = 1 last_distribution = occur_count_vector / np.sum(occur_count_vector) fr_dist = open(file_dist, "w") cur_node = root_node cur_type = root_type for wl_cnt in range(1, self._walk_length + 1): next_type_list = list(self._adj_lookupdict[cur_node].keys()) if random.random() < self._alpha: if len(next_type_list) == 0: cur_type = root_type cur_node = root_node elif len(next_type_list) == 1: cur_type = next_type_list[0] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) else: occupancy = history[next_type_list] cur_type = utils.unigram_sample(population=next_type_list, size=1, weight=occupancy)[0] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) history[cur_type] += 1 else: if len(next_type_list) == 0: cur_type = root_type cur_node = root_node else: cur_type = random.choice(next_type_list) cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) history[cur_type] += 1 occur_count_vector[cur_node] += 1 if wl_cnt % window_size == 0: cur_distribution = occur_count_vector / np.sum( occur_count_vector) mix_distribution = (cur_distribution + last_distribution) / 2 JS_dist = (scipy.stats.entropy( cur_distribution, mix_distribution) + scipy.stats.entropy( last_distribution, mix_distribution)) / 2 EUC_dist = np.sqrt( np.sum(np.square(cur_distribution - last_distribution))) fr_dist.write("{} {}\n".format(JS_dist, EUC_dist)) fr_dist.flush() last_distribution = cur_distribution fr_dist.close() np.savetxt(file_vector, occur_count_vector, fmt="%d") return
def _spacey_metatree_only_random_walk( self, root_node, file_vector, file_dist, window_size): # metapath, multi-metapath, metagraph root_type = self._net.get_node_type(root_node) if root_type not in self._metatree_type_id_dict: return [] root_id = random.choice(self._metatree_type_id_dict[root_type]) if self._history_position == "local": history = np.ones([len(self._metagraph.nodes())], dtype=np.float64) elif self._history_position == "global": history = self._history occur_count_vector = np.zeros(self.nodes_size, dtype=np.int32) occur_count_vector[root_node] = 1 last_distribution = occur_count_vector / np.sum(occur_count_vector) fr_dist = open(file_dist, "w") # current node cur_node = root_node cur_type = root_type cur_id = root_id for wl_cnt in range(1, self._walk_length + 1): if random.random() < self._alpha: cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys() next_id_list = [ v for v in self._metagraph[cur_id] if self._metagraph.nodes[v]["type"] in cur_node_adj_typelist ] if len(next_id_list) == 0: cur_node = root_node cur_type = root_type cur_id = root_id elif len(next_id_list) == 1: cur_id = next_id_list[0] cur_type = self._metagraph.nodes[cur_id]["type"] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) else: occupancy = history[next_id_list] cur_id = utils.unigram_sample(population=next_id_list, size=1, weight=occupancy)[0] cur_type = self._metagraph.nodes[cur_id]["type"] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) # logger.info('next: %d %d' % (cur_type, cur_id)) history[cur_id] += 1 # spacey out cur_id = utils.unigram_sample( population=self._metatree_type_id_dict[cur_type], size=1, weight=history[self._metatree_type_id_dict[cur_type]])[0] else: cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys() next_id_list = [ v for v in self._metagraph[cur_id] if self._metagraph.nodes[v]["type"] in cur_node_adj_typelist ] if len(next_id_list) == 0: cur_node = root_node cur_type = root_type cur_id = root_id else: cur_id = random.choice(next_id_list) cur_type = self._metagraph.nodes[cur_id]["type"] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) history[cur_id] += 1 occur_count_vector[cur_node] += 1 if wl_cnt % window_size == 0: cur_distribution = occur_count_vector / np.sum( occur_count_vector) mix_distribution = (cur_distribution + last_distribution) / 2 JS_dist = (scipy.stats.entropy( cur_distribution, mix_distribution) + scipy.stats.entropy( last_distribution, mix_distribution)) / 2 EUC_dist = np.sqrt( np.sum(np.square(cur_distribution - last_distribution))) fr_dist.write("{} {}\n".format(JS_dist, EUC_dist)) fr_dist.flush() last_distribution = cur_distribution fr_dist.close() np.savetxt(file_vector, occur_count_vector, fmt="%d") return
def _spacey_metatree_random_walk( self, root_node, walk_times=0): # metapath, multi-metapath, metagraph root_type = self._net.get_node_type(root_node) if root_type not in self._metatree_type_id_dict: root_type = random.choice(list(self._metatree_type_id_dict.keys())) root_node = random.choice(self._nodes_type_dict[root_type][0]) root_id = random.choice(self._metatree_type_id_dict[root_type]) context_nodes_dict = {} # if self._history_position == "local": history = np.ones([len(self._metagraph.nodes())], dtype=np.float64) elif self._history_position == "global": history = self._history for _ in range(walk_times): if self._history_position == "local_walktime": history = np.ones([len(self._metagraph.nodes())], dtype=np.float64) # current node cur_node = root_node cur_type = root_type cur_id = root_id # logger.info('start: %d %d' % (cur_type, cur_id)) for __ in range(self._walk_length): # choose next type if random.random() < self._walk_restart: cur_node = root_node cur_type = root_type cur_id = root_id cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys() next_id_list = [ v for v in self._metagraph[cur_id] if self._metagraph.nodes[v]["type"] in cur_node_adj_typelist ] if len(next_id_list) == 0: cur_type = root_type cur_node = root_node cur_id = root_id elif len(next_id_list) == 1: cur_id = next_id_list[0] cur_type = self._metagraph.nodes[cur_id]["type"] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) history[cur_id] += 1 else: occupancy = history[next_id_list] cur_id = utils.unigram_sample(population=next_id_list, size=1, weight=occupancy)[0] cur_type = self._metagraph.nodes[cur_id]["type"] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) history[cur_id] += 1 cur_id = utils.unigram_sample( population=self._metatree_type_id_dict[cur_type], size=1, weight=history[self._metatree_type_id_dict[cur_type]])[0] if cur_type in context_nodes_dict: context_nodes_dict[cur_type][0].append( cur_node) # context_list context_nodes_dict[cur_type][1].add(cur_node) # except_set else: context_nodes_dict[cur_type] = [[cur_node], {cur_node, root_node}] type_context_nodes_list = [] type_neg_nodes_list = [] type_mask_list = [] for k in range(self.node_types_size): if k in context_nodes_dict: context_nodes = context_nodes_dict[k][0] except_set = context_nodes_dict[k][1] type_mask_list.append(1) type_context_nodes_list.append(context_nodes) type_neg_nodes_list.append( utils.neg_sample(self._nodes_type_dict[k][0], except_set, num=self._neg_sampled, alias_table=self._nodes_type_dict[k][1])) else: type_mask_list.append(0) type_context_nodes_list.append([0]) type_neg_nodes_list.append([0]) return root_node, type_context_nodes_list, type_mask_list, type_neg_nodes_list
def _spacey_metaschema_random_walk(self, root_node, walk_times=0): root_type = self._net.get_node_type(root_node) context_nodes_dict = {} # if self._history_position == "local": history = np.ones([self.node_types_size], dtype=np.float64) elif self._history_position == "global": history = self._history for _ in range(walk_times): if self._history_position == "local_walktime": history = np.ones([self.node_types_size], dtype=np.float64) # current node cur_node = root_node cur_type = root_type for __ in range(self._walk_length): # choose next type if random.random() < self._walk_restart: cur_node = root_node cur_type = root_type next_type_list = list(self._adj_lookupdict[cur_node].keys()) if len(next_type_list) == 0: cur_type = root_type cur_node = root_node elif len(next_type_list) == 1: cur_type = next_type_list[0] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) history[cur_type] += 1 else: occupancy = history[next_type_list] cur_type = utils.unigram_sample(population=next_type_list, size=1, weight=occupancy)[0] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) history[cur_type] += 1 if cur_type in context_nodes_dict: context_nodes_dict[cur_type][0].append( cur_node) # context_list context_nodes_dict[cur_type][1].add(cur_node) # except_set else: context_nodes_dict[cur_type] = [[cur_node], {cur_node, root_node}] type_context_nodes_list = [] type_neg_nodes_list = [] type_mask_list = [] for k in range(self.node_types_size): if k in context_nodes_dict: context_nodes = context_nodes_dict[k][0] except_set = context_nodes_dict[k][1] type_mask_list.append(1) type_context_nodes_list.append(context_nodes) type_neg_nodes_list.append( utils.neg_sample(self._nodes_type_dict[k][0], except_set, num=self._neg_sampled, alias_table=self._nodes_type_dict[k][1])) else: type_mask_list.append(0) type_context_nodes_list.append([0]) type_neg_nodes_list.append([0]) return root_node, type_context_nodes_list, type_mask_list, type_neg_nodes_list
def _spacey_metatree_only_random_walk( self, root_node): # metapath, multi-metapath, metagraph root_type = self._net.get_node_type(root_node) if root_type not in self._metatree_type_id_dict: return [] root_id = random.choice(self._metatree_type_id_dict[root_type]) if self._history_position == "local": history = np.ones([len(self._metagraph.nodes())], dtype=np.float64) elif self._history_position == "global": history = self._history # current node cur_node = root_node cur_type = root_type cur_id = root_id path = [str(cur_node)] for __ in range(self._walk_length): # logger.info("history={}".format(history)) # choose next type if random.random() < self._walk_restart: cur_node = root_node cur_type = root_type cur_id = root_id if random.random() < self._alpha: cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys() next_id_list = [ v for v in self._metagraph[cur_id] if self._metagraph.nodes[v]["type"] in cur_node_adj_typelist ] if len(next_id_list) == 0: break elif len(next_id_list) == 1: cur_id = next_id_list[0] cur_type = self._metagraph.nodes[cur_id]["type"] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) else: occupancy = history[next_id_list] cur_id = utils.unigram_sample(population=next_id_list, size=1, weight=occupancy)[0] cur_type = self._metagraph.nodes[cur_id]["type"] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) # logger.info('next: %d %d' % (cur_type, cur_id)) history[cur_id] += 1 # spacey out cur_id = utils.unigram_sample( population=self._metatree_type_id_dict[cur_type], size=1, weight=history[self._metatree_type_id_dict[cur_type]])[0] else: cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys() next_id_list = [ v for v in self._metagraph[cur_id] if self._metagraph.nodes[v]["type"] in cur_node_adj_typelist ] if len(next_id_list) == 0: break else: cur_id = random.choice(next_id_list) cur_type = self._metagraph.nodes[cur_id]["type"] cur_node = random.choice( self._adj_lookupdict[cur_node][cur_type]) history[cur_id] += 1 # logger.info('next: %d %d' % (cur_type, cur_id)) # spacey out path.append(str(cur_node)) return path