def _generate(self, g, eids, canonical_etype): dtype = F.dtype(eids) ctx = F.context(eids) # find 起始点 src, _dst = g.find_edges(eids, etype=canonical_etype) etype = self.etype_dict[eids] src = F.repeat(src, self.k, 0) etype = F.repeat(etype, self.k, 0) dsts = None # dst 应该在set里选哈 for i in _dst : ''' 没有过滤掉负采样中的正例,并且在负采样中没有去除正确的原dst 同时没有为这条边生成采样系数权值subsampling weight 同时这里的采样只取true src与negative dst,暨tail batch, 应该根据mode来进行正负采样 这里在UniformBaseOnTriples进行修正 ''' nid = i.numel() ntype = self.ntype_dict[nid] node_set = self.type_set[ntype] node_limit = len(node_set) # uniform sampling dst = F.randint((1, 2 * self.k), dtype, ctx, 0, node_limit) dst = node_set[dst] if dsts is None : dsts = dst else : dsts = torch.cat((dsts, dst), dim = 1) return (src, dsts.squeeze(dim = 0)), etype
def _generate(self, g, eids, canonical_etype): _, _, vtype = canonical_etype shape = F.shape(eids) dtype = F.dtype(eids) ctx = F.context(eids) shape = (shape[0] * self.k,) src, _ = g.find_edges(eids, etype=canonical_etype) src = F.repeat(src, self.k, 0) dst = F.randint(shape, dtype, ctx, 0, g.number_of_nodes(vtype)) return src, dst
def _generate(self, g, eids, canonical_etype): _, _, vtype = canonical_etype shape = F.shape(eids) dtype = F.dtype(eids) ctx = F.context(eids) shape = (shape[0] * self.k, ) src, _ = g.find_edges(eids, etype=canonical_etype) src = F.repeat(src, self.k, 0) dst = np.random.choice(np.arange(0, g.number_of_nodes()), shape, replace=True, p=self.p) # dst = F.randint(shape, dtype, ctx, 0, g.number_of_nodes(vtype)) dst = th.tensor(dst, dtype=dtype, device=ctx) return src, dst
def extract_edge_with_id_edge(g): # input a homogeneous graph # return tensor with shape of [2,num_edges] edges = g.edges() edata = g.edata['_TYPE'] num_edge_type = th.max(edata).item() ctx = F.context(edges[0]) dtype = F.dtype(edges[0]) A = [] for i in range(num_edge_type + 1): index = th.nonzero(edata == i).squeeze() e_0 = edges[0][index] e_1 = edges[1][index] # edges is tuple e = th.stack((e_0, e_1), dim=0) # turn the edge type(tuple) to tensor values = th.ones(e.shape[1], device=ctx) A.append((e, values)) x = th.arange(0, g.num_nodes(), dtype=dtype, device=ctx) id_edge = th.stack((x, x), dim=0) values = th.ones(id_edge.shape[1], device=ctx) A.append((id_edge, values)) return A
def extract_mtx_with_id_edge(g): # input a homogeneous graph # return tensor with shape of [2,num_edges] edges = g.edges() edata = g.edata['_TYPE'] num_edge_type = th.max(edata).item() ctx = F.context(edges[0]) dtype = F.dtype(edges[0]) A = [] num_nodes = g.num_nodes() for i in range(num_edge_type + 1): index = th.nonzero(edata == i).squeeze() e_0 = edges[0][index].to('cpu').numpy() e_1 = edges[1][index].to('cpu').numpy() values = np.ones(e_0.shape[0]) m = coo_matrix((values, (e_0, e_1)), shape=(num_nodes, num_nodes)) m = th.from_numpy(m.todense()).type(th.FloatTensor).unsqueeze(0) if 0 == i: A = m else: A = th.cat([A, m], dim=0) m = th.eye(num_nodes).unsqueeze(0) A = th.cat([A, m], dim=0) return A.to(ctx)
def predict_neg_score(self, pos_g, neg_g, to_device=None, gpu_id=-1, trace=False, neg_deg_sample=False): """Calculate the negative score. Parameters ---------- pos_g : DGLGraph Graph holding positive edges. neg_g : DGLGraph Graph holding negative edges. to_device : func Function to move data into device. gpu_id : int Which gpu to move data to. trace : bool If True, trace the computation. This is required in training. If False, do not trace the computation. Default: False neg_deg_sample : bool If True, we use the head and tail nodes of the positive edges to construct negative edges. Default: False Returns ------- tensor The negative score """ num_chunks = neg_g.num_chunks chunk_size = neg_g.chunk_size neg_sample_size = neg_g.neg_sample_size mask = F.ones((num_chunks, chunk_size * (neg_sample_size + chunk_size)), dtype=F.float32, ctx=F.context(pos_g.ndata['emb'])) if neg_g.neg_head: neg_head_ids = neg_g.ndata['id'][neg_g.head_nid] neg_head = self.entity_emb(neg_head_ids, gpu_id, trace) head_ids, tail_ids = pos_g.all_edges(order='eid') if to_device is not None and gpu_id >= 0: tail_ids = to_device(tail_ids, gpu_id) tail = pos_g.ndata['emb'][tail_ids] rel = pos_g.edata['emb'] # When we train a batch, we could use the head nodes of the positive edges to # construct negative edges. We construct a negative edge between a positive head # node and every positive tail node. # When we construct negative edges like this, we know there is one positive # edge for a positive head node among the negative edges. We need to mask # them. if neg_deg_sample: head = pos_g.ndata['emb'][head_ids] head = head.reshape(num_chunks, chunk_size, -1) neg_head = neg_head.reshape(num_chunks, neg_sample_size, -1) neg_head = F.cat([head, neg_head], 1) neg_sample_size = chunk_size + neg_sample_size mask[:,0::(neg_sample_size + 1)] = 0 neg_head = neg_head.reshape(num_chunks * neg_sample_size, -1) neg_head, tail = self.head_neg_prepare(pos_g.edata['id'], num_chunks, neg_head, tail, gpu_id, trace) neg_score = self.head_neg_score(neg_head, rel, tail, num_chunks, chunk_size, neg_sample_size) else: neg_tail_ids = neg_g.ndata['id'][neg_g.tail_nid] neg_tail = self.entity_emb(neg_tail_ids, gpu_id, trace) head_ids, tail_ids = pos_g.all_edges(order='eid') if to_device is not None and gpu_id >= 0: head_ids = to_device(head_ids, gpu_id) head = pos_g.ndata['emb'][head_ids] rel = pos_g.edata['emb'] # This is negative edge construction similar to the above. if neg_deg_sample: tail = pos_g.ndata['emb'][tail_ids] tail = tail.reshape(num_chunks, chunk_size, -1) neg_tail = neg_tail.reshape(num_chunks, neg_sample_size, -1) neg_tail = F.cat([tail, neg_tail], 1) neg_sample_size = chunk_size + neg_sample_size mask[:,0::(neg_sample_size + 1)] = 0 neg_tail = neg_tail.reshape(num_chunks * neg_sample_size, -1) head, neg_tail = self.tail_neg_prepare(pos_g.edata['id'], num_chunks, head, neg_tail, gpu_id, trace) neg_score = self.tail_neg_score(head, rel, neg_tail, num_chunks, chunk_size, neg_sample_size) if neg_deg_sample: neg_g.neg_sample_size = neg_sample_size mask = mask.reshape(num_chunks, chunk_size, neg_sample_size) return neg_score * mask else: return neg_score
def predict_neg_score(self, pos_g, neg_g, to_device=None, gpu_id=-1, trace=False, neg_deg_sample=False): num_chunks = neg_g.num_chunks chunk_size = neg_g.chunk_size neg_sample_size = neg_g.neg_sample_size mask = F.ones( (num_chunks, chunk_size * (neg_sample_size + chunk_size)), dtype=F.float32, ctx=F.context(pos_g.ndata['emb'])) if neg_g.neg_head: neg_head_ids = neg_g.ndata['id'][neg_g.head_nid] neg_head = self.entity_emb(neg_head_ids, gpu_id, trace) head_ids, tail_ids = pos_g.all_edges(order='eid') if to_device is not None and gpu_id >= 0: tail_ids = to_device(tail_ids, gpu_id) tail = pos_g.ndata['emb'][tail_ids] rel = pos_g.edata['emb'] # When we train a batch, we could use the head nodes of the positive edges to # construct negative edges. We construct a negative edge between a positive head # node and every positive tail node. # When we construct negative edges like this, we know there is one positive # edge for a positive head node among the negative edges. We need to mask # them. if neg_deg_sample: head = pos_g.ndata['emb'][head_ids] head = head.reshape(num_chunks, chunk_size, -1) neg_head = neg_head.reshape(num_chunks, neg_sample_size, -1) neg_head = F.cat([head, neg_head], 1) neg_sample_size = chunk_size + neg_sample_size mask[:, 0::(neg_sample_size + 1)] = 0 neg_head = neg_head.reshape(num_chunks * neg_sample_size, -1) neg_head, tail = self.head_neg_prepare(pos_g.edata['id'], num_chunks, neg_head, tail, gpu_id, trace) neg_score = self.head_neg_score(neg_head, rel, tail, num_chunks, chunk_size, neg_sample_size) else: neg_tail_ids = neg_g.ndata['id'][neg_g.tail_nid] neg_tail = self.entity_emb(neg_tail_ids, gpu_id, trace) head_ids, tail_ids = pos_g.all_edges(order='eid') if to_device is not None and gpu_id >= 0: head_ids = to_device(head_ids, gpu_id) head = pos_g.ndata['emb'][head_ids] rel = pos_g.edata['emb'] # This is negative edge construction similar to the above. if neg_deg_sample: tail = pos_g.ndata['emb'][tail_ids] tail = tail.reshape(num_chunks, chunk_size, -1) neg_tail = neg_tail.reshape(num_chunks, neg_sample_size, -1) neg_tail = F.cat([tail, neg_tail], 1) neg_sample_size = chunk_size + neg_sample_size mask[:, 0::(neg_sample_size + 1)] = 0 neg_tail = neg_tail.reshape(num_chunks * neg_sample_size, -1) head, neg_tail = self.tail_neg_prepare(pos_g.edata['id'], num_chunks, head, neg_tail, gpu_id, trace) neg_score = self.tail_neg_score(head, rel, neg_tail, num_chunks, chunk_size, neg_sample_size) if neg_deg_sample: neg_g.neg_sample_size = neg_sample_size mask = mask.reshape(num_chunks, chunk_size, neg_sample_size) return neg_score * mask else: return neg_score