def _generate(self, g, eids, canonical_etype): dtype = F.dtype(eids) ctx = F.context(eids) # find 起始点 src, _dst = g.find_edges(eids, etype=canonical_etype) etype = self.etype_dict[eids] src = F.repeat(src, self.k, 0) etype = F.repeat(etype, self.k, 0) dsts = None # dst 应该在set里选哈 for i in _dst : ''' 没有过滤掉负采样中的正例,并且在负采样中没有去除正确的原dst 同时没有为这条边生成采样系数权值subsampling weight 同时这里的采样只取true src与negative dst,暨tail batch, 应该根据mode来进行正负采样 这里在UniformBaseOnTriples进行修正 ''' nid = i.numel() ntype = self.ntype_dict[nid] node_set = self.type_set[ntype] node_limit = len(node_set) # uniform sampling dst = F.randint((1, 2 * self.k), dtype, ctx, 0, node_limit) dst = node_set[dst] if dsts is None : dsts = dst else : dsts = torch.cat((dsts, dst), dim = 1) return (src, dsts.squeeze(dim = 0)), etype
def _generate(self, g, eids, canonical_etype): _, _, vtype = canonical_etype shape = F.shape(eids) dtype = F.dtype(eids) ctx = F.context(eids) shape = (shape[0] * self.k,) src, _ = g.find_edges(eids, etype=canonical_etype) src = F.repeat(src, self.k, 0) dst = F.randint(shape, dtype, ctx, 0, g.number_of_nodes(vtype)) return src, dst
def _generate(self, g, eids, canonical_etype): _, _, vtype = canonical_etype shape = F.shape(eids) dtype = F.dtype(eids) ctx = F.context(eids) shape = (shape[0] * self.k, ) src, _ = g.find_edges(eids, etype=canonical_etype) src = F.repeat(src, self.k, 0) dst = np.random.choice(np.arange(0, g.number_of_nodes()), shape, replace=True, p=self.p) # dst = F.randint(shape, dtype, ctx, 0, g.number_of_nodes(vtype)) dst = th.tensor(dst, dtype=dtype, device=ctx) return src, dst
def extract_edge_with_id_edge(g): # input a homogeneous graph # return tensor with shape of [2,num_edges] edges = g.edges() edata = g.edata['_TYPE'] num_edge_type = th.max(edata).item() ctx = F.context(edges[0]) dtype = F.dtype(edges[0]) A = [] for i in range(num_edge_type + 1): index = th.nonzero(edata == i).squeeze() e_0 = edges[0][index] e_1 = edges[1][index] # edges is tuple e = th.stack((e_0, e_1), dim=0) # turn the edge type(tuple) to tensor values = th.ones(e.shape[1], device=ctx) A.append((e, values)) x = th.arange(0, g.num_nodes(), dtype=dtype, device=ctx) id_edge = th.stack((x, x), dim=0) values = th.ones(id_edge.shape[1], device=ctx) A.append((id_edge, values)) return A
def extract_mtx_with_id_edge(g): # input a homogeneous graph # return tensor with shape of [2,num_edges] edges = g.edges() edata = g.edata['_TYPE'] num_edge_type = th.max(edata).item() ctx = F.context(edges[0]) dtype = F.dtype(edges[0]) A = [] num_nodes = g.num_nodes() for i in range(num_edge_type + 1): index = th.nonzero(edata == i).squeeze() e_0 = edges[0][index].to('cpu').numpy() e_1 = edges[1][index].to('cpu').numpy() values = np.ones(e_0.shape[0]) m = coo_matrix((values, (e_0, e_1)), shape=(num_nodes, num_nodes)) m = th.from_numpy(m.todense()).type(th.FloatTensor).unsqueeze(0) if 0 == i: A = m else: A = th.cat([A, m], dim=0) m = th.eye(num_nodes).unsqueeze(0) A = th.cat([A, m], dim=0) return A.to(ctx)
def start(self): """Start service of KVServer """ # Get connected with all client nodes server_ip, server_port = self._addr.split(':') _receiver_wait(self._receiver, server_ip, int(server_port), self._client_count) # recv client addr and assign ID for clients addr_list = [] for i in range(self._client_count): msg = _recv_kv_msg(self._receiver) assert msg.type == KVMsgType.IP_ID addr_list.append(msg.name) self._sort_addr(addr_list) for ID in range(len(addr_list)): self._client_namebook[ID] = addr_list[ID] _network_wait() for ID, addr in self._client_namebook.items(): client_ip, client_port = addr.split(':') _add_receiver_addr(self._sender, client_ip, int(client_port), ID) _sender_connect(self._sender) if self._server_id == 0: # assign ID to client nodes for client_id, addr in self._client_namebook.items(): msg = KVStoreMsg(type=KVMsgType.IP_ID, rank=self._server_id, name=str(client_id), id=None, data=None) _send_kv_msg(self._sender, msg, client_id) # send serilaized shared-memory tensor information to clients shared_tensor = '' for name in self._has_data: shared_tensor += self._serialize_shared_tensor( name, F.shape(self._data_store[name]), F.dtype(self._data_store[name])) shared_tensor += '|' msg = KVStoreMsg(type=KVMsgType.IP_ID, rank=self._server_id, name=shared_tensor, id=None, data=None) for client_id in range(len(self._client_namebook)): _send_kv_msg(self._sender, msg, client_id) # Service loop while True: msg = _recv_kv_msg(self._receiver) # PUSH message if msg.type == KVMsgType.PUSH: if (msg.name + '-g2l-' in self._has_data) == True: local_id = self._data_store[msg.name + '-g2l-'][msg.id] else: local_id = msg.id self._push_handler(msg.name + '-data-', local_id, msg.data, self._data_store) # PULL message elif msg.type == KVMsgType.PULL: if (msg.name + '-g2l-' in self._has_data) == True: local_id = self._data_store[msg.name + '-g2l-'][msg.id] else: local_id = msg.id res_tensor = self._pull_handler(msg.name + '-data-', local_id, self._data_store) back_msg = KVStoreMsg(type=KVMsgType.PULL_BACK, rank=self._server_id, name=msg.name, id=msg.id, data=res_tensor) _send_kv_msg(self._sender, back_msg, msg.rank) # Barrier message elif msg.type == KVMsgType.BARRIER: self._barrier_count += 1 if self._barrier_count == self._client_count: back_msg = KVStoreMsg(type=KVMsgType.BARRIER, rank=self._server_id, name=None, id=None, data=None) for i in range(self._client_count): _send_kv_msg(self._sender, back_msg, i) self._barrier_count = 0 # FINAL message elif msg.type == KVMsgType.FINAL: print("Exit KVStore service, server ID: %d" % self._server_id) break # exit loop else: raise RuntimeError('Unknown type of kvstore message: %d' % msg.type.value)
def _collate_with_negative_sampling(self, items): if isinstance(items[0], tuple): # returns a list of pairs: group them by node types into a dict items = utils.group_as_dict(items) items = utils.prepare_tensor_dict(self.g_sampling, items, 'items') else: items = utils.prepare_tensor(self.g_sampling, items, 'items') pair_graph = self.g.edge_subgraph(items, preserve_nodes=True) induced_edges = pair_graph.edata[EID] neg_srcdst, edge_type, subsampling_w = self.negative_sampler( self.g, items) # neg_srcdst, edge_type = self.negative_sampler(self.g, items) # neg_srcdst = self.negative_sampler(self.g, items) if not isinstance(neg_srcdst, Mapping): assert len(self.g.etypes) == 1, \ 'graph has multiple or no edge types; '\ 'please return a dict in negative sampler.' neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst} # Get dtype from a tuple of tensors dtype = F.dtype(list(neg_srcdst.values())[0][0]) neg_edges = { etype: neg_srcdst.get(etype, (F.tensor([], dtype), F.tensor([], dtype))) for etype in self.g.canonical_etypes } neg_pair_graph = heterograph( neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes}) pair_graph, neg_pair_graph = transform.compact_graphs( [pair_graph, neg_pair_graph]) pair_graph.edata[EID] = induced_edges num = 0 for type in neg_pair_graph.canonical_etypes: if isinstance(edge_type, dict): neg_pair_graph.edata[type]['etype'] = edge_type[type] neg_pair_graph.edata[type][ 'sw'] = subsampling_w # add sumpling weight else: neg_pair_graph.edata['etype'] = edge_type[ num:num + neg_pair_graph.number_of_edges(type)] neg_pair_graph.edata['sw'] = subsampling_w[ num:num + neg_pair_graph.number_of_edges( type)] # add sumpling weight num += neg_pair_graph.number_of_edges(type) seed_nodes = pair_graph.ndata[NID] exclude_eids = _find_exclude_eids( self.g, self.exclude, items, reverse_eid_map=self.reverse_eids, reverse_etype_map=self.reverse_etypes) blocks = self.block_sampler.sample_blocks(self.g_sampling, seed_nodes, exclude_eids=exclude_eids) input_nodes = blocks[0].srcdata[NID] return input_nodes, pair_graph, neg_pair_graph, blocks