Exemple #1
0
 def _generate(self, g, eids, canonical_etype):
     dtype = F.dtype(eids)
     ctx = F.context(eids)
     # find 起始点
     src, _dst = g.find_edges(eids, etype=canonical_etype)
     etype = self.etype_dict[eids]
     src = F.repeat(src, self.k, 0)
     etype = F.repeat(etype, self.k, 0)
     dsts = None
     # dst 应该在set里选哈
     for i in _dst :
         '''
         没有过滤掉负采样中的正例,并且在负采样中没有去除正确的原dst 同时没有为这条边生成采样系数权值subsampling weight
         同时这里的采样只取true src与negative dst,暨tail batch, 应该根据mode来进行正负采样
         这里在UniformBaseOnTriples进行修正
         '''
         nid = i.numel()
         ntype = self.ntype_dict[nid]
         node_set = self.type_set[ntype]
         node_limit = len(node_set)
         # uniform sampling
         dst = F.randint((1, 2 * self.k), dtype, ctx, 0, node_limit)
         dst = node_set[dst]
         if dsts is None :
             dsts = dst
         else :
             dsts = torch.cat((dsts, dst), dim = 1)
     return (src, dsts.squeeze(dim = 0)), etype
Exemple #2
0
 def _generate(self, g, eids, canonical_etype):
     _, _, vtype = canonical_etype
     shape = F.shape(eids)
     dtype = F.dtype(eids)
     ctx = F.context(eids)
     shape = (shape[0] * self.k,)
     src, _ = g.find_edges(eids, etype=canonical_etype)
     src = F.repeat(src, self.k, 0)
     dst = F.randint(shape, dtype, ctx, 0, g.number_of_nodes(vtype))
     return src, dst
Exemple #3
0
 def _generate(self, g, eids, canonical_etype):
     _, _, vtype = canonical_etype
     shape = F.shape(eids)
     dtype = F.dtype(eids)
     ctx = F.context(eids)
     shape = (shape[0] * self.k, )
     src, _ = g.find_edges(eids, etype=canonical_etype)
     src = F.repeat(src, self.k, 0)
     dst = np.random.choice(np.arange(0, g.number_of_nodes()),
                            shape,
                            replace=True,
                            p=self.p)
     # dst = F.randint(shape, dtype, ctx, 0, g.number_of_nodes(vtype))
     dst = th.tensor(dst, dtype=dtype, device=ctx)
     return src, dst
Exemple #4
0
def extract_edge_with_id_edge(g):
    # input a homogeneous graph
    # return tensor with shape of [2,num_edges]
    edges = g.edges()
    edata = g.edata['_TYPE']
    num_edge_type = th.max(edata).item()
    ctx = F.context(edges[0])
    dtype = F.dtype(edges[0])
    A = []
    for i in range(num_edge_type + 1):
        index = th.nonzero(edata == i).squeeze()
        e_0 = edges[0][index]
        e_1 = edges[1][index]  # edges is  tuple
        e = th.stack((e_0, e_1), dim=0)
        # turn the edge type(tuple) to tensor
        values = th.ones(e.shape[1], device=ctx)
        A.append((e, values))
    x = th.arange(0, g.num_nodes(), dtype=dtype, device=ctx)
    id_edge = th.stack((x, x), dim=0)
    values = th.ones(id_edge.shape[1], device=ctx)
    A.append((id_edge, values))
    return A
Exemple #5
0
def extract_mtx_with_id_edge(g):
    # input a homogeneous graph
    # return tensor with shape of [2,num_edges]
    edges = g.edges()
    edata = g.edata['_TYPE']
    num_edge_type = th.max(edata).item()
    ctx = F.context(edges[0])
    dtype = F.dtype(edges[0])
    A = []
    num_nodes = g.num_nodes()
    for i in range(num_edge_type + 1):
        index = th.nonzero(edata == i).squeeze()
        e_0 = edges[0][index].to('cpu').numpy()
        e_1 = edges[1][index].to('cpu').numpy()
        values = np.ones(e_0.shape[0])
        m = coo_matrix((values, (e_0, e_1)), shape=(num_nodes, num_nodes))
        m = th.from_numpy(m.todense()).type(th.FloatTensor).unsqueeze(0)
        if 0 == i:
            A = m
        else:
            A = th.cat([A, m], dim=0)
    m = th.eye(num_nodes).unsqueeze(0)
    A = th.cat([A, m], dim=0)
    return A.to(ctx)
Exemple #6
0
    def start(self):
        """Start service of KVServer
        """
        # Get connected with all client nodes
        server_ip, server_port = self._addr.split(':')
        _receiver_wait(self._receiver, server_ip, int(server_port),
                       self._client_count)

        # recv client addr and assign ID for clients
        addr_list = []
        for i in range(self._client_count):
            msg = _recv_kv_msg(self._receiver)
            assert msg.type == KVMsgType.IP_ID
            addr_list.append(msg.name)

        self._sort_addr(addr_list)
        for ID in range(len(addr_list)):
            self._client_namebook[ID] = addr_list[ID]

        _network_wait()

        for ID, addr in self._client_namebook.items():
            client_ip, client_port = addr.split(':')
            _add_receiver_addr(self._sender, client_ip, int(client_port), ID)

        _sender_connect(self._sender)

        if self._server_id == 0:
            # assign ID to client nodes
            for client_id, addr in self._client_namebook.items():
                msg = KVStoreMsg(type=KVMsgType.IP_ID,
                                 rank=self._server_id,
                                 name=str(client_id),
                                 id=None,
                                 data=None)
                _send_kv_msg(self._sender, msg, client_id)

            # send serilaized shared-memory tensor information to clients
            shared_tensor = ''
            for name in self._has_data:
                shared_tensor += self._serialize_shared_tensor(
                    name, F.shape(self._data_store[name]),
                    F.dtype(self._data_store[name]))

                shared_tensor += '|'

            msg = KVStoreMsg(type=KVMsgType.IP_ID,
                             rank=self._server_id,
                             name=shared_tensor,
                             id=None,
                             data=None)

            for client_id in range(len(self._client_namebook)):
                _send_kv_msg(self._sender, msg, client_id)

        # Service loop
        while True:
            msg = _recv_kv_msg(self._receiver)
            # PUSH message
            if msg.type == KVMsgType.PUSH:
                if (msg.name + '-g2l-' in self._has_data) == True:
                    local_id = self._data_store[msg.name + '-g2l-'][msg.id]
                else:
                    local_id = msg.id
                self._push_handler(msg.name + '-data-', local_id, msg.data,
                                   self._data_store)
            # PULL message
            elif msg.type == KVMsgType.PULL:
                if (msg.name + '-g2l-' in self._has_data) == True:
                    local_id = self._data_store[msg.name + '-g2l-'][msg.id]
                else:
                    local_id = msg.id
                res_tensor = self._pull_handler(msg.name + '-data-', local_id,
                                                self._data_store)
                back_msg = KVStoreMsg(type=KVMsgType.PULL_BACK,
                                      rank=self._server_id,
                                      name=msg.name,
                                      id=msg.id,
                                      data=res_tensor)
                _send_kv_msg(self._sender, back_msg, msg.rank)
            # Barrier message
            elif msg.type == KVMsgType.BARRIER:
                self._barrier_count += 1
                if self._barrier_count == self._client_count:
                    back_msg = KVStoreMsg(type=KVMsgType.BARRIER,
                                          rank=self._server_id,
                                          name=None,
                                          id=None,
                                          data=None)
                    for i in range(self._client_count):
                        _send_kv_msg(self._sender, back_msg, i)
                    self._barrier_count = 0
            # FINAL message
            elif msg.type == KVMsgType.FINAL:
                print("Exit KVStore service, server ID: %d" % self._server_id)
                break  # exit loop
            else:
                raise RuntimeError('Unknown type of kvstore message: %d' %
                                   msg.type.value)
Exemple #7
0
    def _collate_with_negative_sampling(self, items):
        if isinstance(items[0], tuple):
            # returns a list of pairs: group them by node types into a dict
            items = utils.group_as_dict(items)
            items = utils.prepare_tensor_dict(self.g_sampling, items, 'items')
        else:
            items = utils.prepare_tensor(self.g_sampling, items, 'items')

        pair_graph = self.g.edge_subgraph(items, preserve_nodes=True)
        induced_edges = pair_graph.edata[EID]

        neg_srcdst, edge_type, subsampling_w = self.negative_sampler(
            self.g, items)
        # neg_srcdst, edge_type = self.negative_sampler(self.g, items)
        # neg_srcdst = self.negative_sampler(self.g, items)
        if not isinstance(neg_srcdst, Mapping):
            assert len(self.g.etypes) == 1, \
                'graph has multiple or no edge types; '\
                'please return a dict in negative sampler.'
            neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst}
        # Get dtype from a tuple of tensors
        dtype = F.dtype(list(neg_srcdst.values())[0][0])
        neg_edges = {
            etype: neg_srcdst.get(etype,
                                  (F.tensor([], dtype), F.tensor([], dtype)))
            for etype in self.g.canonical_etypes
        }
        neg_pair_graph = heterograph(
            neg_edges,
            {ntype: self.g.number_of_nodes(ntype)
             for ntype in self.g.ntypes})

        pair_graph, neg_pair_graph = transform.compact_graphs(
            [pair_graph, neg_pair_graph])
        pair_graph.edata[EID] = induced_edges
        num = 0
        for type in neg_pair_graph.canonical_etypes:
            if isinstance(edge_type, dict):
                neg_pair_graph.edata[type]['etype'] = edge_type[type]
                neg_pair_graph.edata[type][
                    'sw'] = subsampling_w  # add sumpling weight
            else:
                neg_pair_graph.edata['etype'] = edge_type[
                    num:num + neg_pair_graph.number_of_edges(type)]
                neg_pair_graph.edata['sw'] = subsampling_w[
                    num:num + neg_pair_graph.number_of_edges(
                        type)]  # add sumpling weight
                num += neg_pair_graph.number_of_edges(type)

        seed_nodes = pair_graph.ndata[NID]

        exclude_eids = _find_exclude_eids(
            self.g,
            self.exclude,
            items,
            reverse_eid_map=self.reverse_eids,
            reverse_etype_map=self.reverse_etypes)

        blocks = self.block_sampler.sample_blocks(self.g_sampling,
                                                  seed_nodes,
                                                  exclude_eids=exclude_eids)

        input_nodes = blocks[0].srcdata[NID]

        return input_nodes, pair_graph, neg_pair_graph, blocks