Example #1
0
 def _generate(self, g, eids, canonical_etype):
     dtype = F.dtype(eids)
     ctx = F.context(eids)
     # find 起始点
     src, _dst = g.find_edges(eids, etype=canonical_etype)
     etype = self.etype_dict[eids]
     src = F.repeat(src, self.k, 0)
     etype = F.repeat(etype, self.k, 0)
     dsts = None
     # dst 应该在set里选哈
     for i in _dst :
         '''
         没有过滤掉负采样中的正例,并且在负采样中没有去除正确的原dst 同时没有为这条边生成采样系数权值subsampling weight
         同时这里的采样只取true src与negative dst,暨tail batch, 应该根据mode来进行正负采样
         这里在UniformBaseOnTriples进行修正
         '''
         nid = i.numel()
         ntype = self.ntype_dict[nid]
         node_set = self.type_set[ntype]
         node_limit = len(node_set)
         # uniform sampling
         dst = F.randint((1, 2 * self.k), dtype, ctx, 0, node_limit)
         dst = node_set[dst]
         if dsts is None :
             dsts = dst
         else :
             dsts = torch.cat((dsts, dst), dim = 1)
     return (src, dsts.squeeze(dim = 0)), etype
Example #2
0
 def _generate(self, g, eids, canonical_etype):
     _, _, vtype = canonical_etype
     shape = F.shape(eids)
     dtype = F.dtype(eids)
     ctx = F.context(eids)
     shape = (shape[0] * self.k,)
     src, _ = g.find_edges(eids, etype=canonical_etype)
     src = F.repeat(src, self.k, 0)
     dst = F.randint(shape, dtype, ctx, 0, g.number_of_nodes(vtype))
     return src, dst
Example #3
0
    def negative_sampling(self, nsrcs, ndsts, node_limit, node_set):
        subsampling_w = []
        neg_sample = None
        for indice, src in enumerate(nsrcs):
            negative_sample_size = 0
            negative_sample_list = []
            subsampling_weight = self.count[src] + self.count[- ndsts[indice] - 1]
            subsampling_weight = torch.sqrt(1 / torch.Tensor([subsampling_weight]))

            while negative_sample_size < self.neg_num:
                # TODO 这里的负采样是全局负采样,对于有关系的需要考虑 metapath
                # 这里都是tail负采样
                negative_sample = numpy.random.randint(node_limit, size=self.neg_num * 2)
                # 排除正例
                mask = numpy.in1d(
                    negative_sample,
                    self.true_tuple[src],  # sample pure tail (not true)
                    assume_unique=True,
                    invert=True)
                negative_sample = negative_sample[mask]
                negative_sample_list.append(negative_sample)
                negative_sample_size += negative_sample.size
                subsampling_w.append(subsampling_weight)

            negative_sample = numpy.concatenate(negative_sample_list)[:self.neg_num]

            if neg_sample is None:
                neg_sample = negative_sample
            else:
                neg_sample = numpy.concatenate((neg_sample, negative_sample))
        subsampling_w = torch.cat(subsampling_w, dim=-1)
        subsampling_w = F.repeat(subsampling_w, self.neg_num, 0)
        pos_src = torch.tensor(nsrcs, dtype=torch.long)
        pos_src = F.repeat(pos_src, self.neg_num, 0)
        dsts = torch.from_numpy(neg_sample)
        for i in dsts:
            num = i.item()
            node_set.add(num)
        return pos_src, dsts, subsampling_w
Example #4
0
 def _generate(self, g, eids, canonical_etype):
     _, _, vtype = canonical_etype
     shape = F.shape(eids)
     dtype = F.dtype(eids)
     ctx = F.context(eids)
     shape = (shape[0] * self.k, )
     src, _ = g.find_edges(eids, etype=canonical_etype)
     src = F.repeat(src, self.k, 0)
     dst = np.random.choice(np.arange(0, g.number_of_nodes()),
                            shape,
                            replace=True,
                            p=self.p)
     # dst = F.randint(shape, dtype, ctx, 0, g.number_of_nodes(vtype))
     dst = th.tensor(dst, dtype=dtype, device=ctx)
     return src, dst
Example #5
0
    def _generate(self, g, eids, canonical_etype) :

        srcs, dsts = g.find_edges(eids, etype=canonical_etype)
        neg_sample = None
        nsrcs, ndsts = srcs.numpy(), dsts.numpy()

        etype = self.etype_dict[eids]
        netype = etype.numpy()
        _, _, vtype = canonical_etype
        subsampling_w = []
        for indice, src in enumerate(nsrcs) :
            negative_sample_size = 0
            rel = netype[indice]
            subsampling_weight = self.count[(src, rel)] + self.count[(ndsts[indice], -rel - 1)]
            subsampling_weight = torch.sqrt(1 / torch.Tensor([subsampling_weight]))
            negative_sample_list = []
            while negative_sample_size < self.k:
                negative_sample = numpy.random.randint(g.number_of_nodes(vtype), size=self.k * 2)
                if self.mode == 'head-batch':
                    mask = numpy.in1d(
                        negative_sample,
                        self.true_head[(rel, ndsts[indice])], # sample pure head (not true)
                        assume_unique=True,
                        invert=True
                    )
                elif self.mode == 'tail-batch':
                    mask = numpy.in1d(
                        negative_sample,
                        self.true_tail[(src, rel)], # sample pure tail (not true)
                        assume_unique=True,
                        invert=True
                    )
                else:
                    raise ValueError('Training batch mode %s not supported' % self.mode)
                negative_sample = negative_sample[mask]
                negative_sample_list.append(negative_sample)
                negative_sample_size += negative_sample.size
                subsampling_w.append(subsampling_weight)
            # negative sample
            negative_sample = numpy.concatenate(negative_sample_list)[:self.k]
            # add positive sample to the head
            # if self.mode == 'head-batch' :
            #     negative_sample = numpy.append(numpy.array(ndsts[indice]), negative_sample)
            # elif self.mode == 'tail-batch' :
            #     negative_sample = numpy.append(numpy.array(src), negative_sample)
            # else:
            #     raise ValueError('Training batch mode %s not supported' % self.mode)

            if neg_sample is None :
                neg_sample  = negative_sample
            else :
                neg_sample = numpy.concatenate((neg_sample, negative_sample))
        if self.mode == 'head-batch':
            dsts = F.repeat(dsts, self.k, 0)
            srcs = torch.from_numpy(neg_sample)
        elif self.mode == 'tail-batch':
            srcs = F.repeat(srcs, self.k, 0)
            dsts = torch.from_numpy(neg_sample)
        else:
            raise ValueError('Training batch mode %s not supported' % self.mode)
        subsampling_w = torch.cat(subsampling_w, dim = -1)
        etype = F.repeat(etype, self.k, 0)
        subsampling_w = F.repeat(subsampling_w, self.k, 0)
        return (srcs, dsts), etype, subsampling_w