Example #1
0
def _find_exclude_eids_with_reverse_id(g, eids, reverse_eid_map):
    if isinstance(eids, Mapping):
        eids = {g.to_canonical_etype(k): v for k, v in eids.items()}
        exclude_eids = {
            k: F.cat([v, F.gather_row(reverse_eid_map[k], v)], 0)
            for k, v in eids.items()
        }
    else:
        exclude_eids = F.cat([eids, F.gather_row(reverse_eid_map, eids)], 0)
    return exclude_eids
Example #2
0
def check_topk_score2(score_model, exclude_mode):
    num_entity = 40
    num_rels = 4

    src = F.arange(0, num_entity)
    dst1 = src + 1
    dst1[num_entity-1] = 0
    dst2 = src - 1
    dst2[0] = num_entity-1
    src = F.cat([src, src], dim=0)
    dst = F.cat([dst1, dst2], dim=0)
    src = F.cat([src, src, src, src], dim=0)
    dst = F.cat([dst, dst, dst, dst], dim=0)
    etype = F.cat([th.full((num_entity*2,), 0, dtype=th.long),
                    th.full((num_entity*2,), 1, dtype=th.long),
                    th.full((num_entity*2,), 2, dtype=th.long),
                    th.full((num_entity*2,), 3, dtype=th.long)],
                    dim=0)
    g = dgl.graph((src, dst))
    g.edata['tid'] = etype

    _check_topk_score2(score_model, g, num_entity, num_rels, exclude_mode)
Example #3
0
    def _merge_msg(self, msg_list):
        """Merge separated message to a big matrix

        Parameters
        ----------
        msg_list : list
            a list of KVStoreMsg

        Return
        ------
        tensor (mx.ndarray or torch.tensor)
            a merged data matrix
        """
        msg_list.sort(key=self._sort_func)
        return F.cat([msg.data for msg in msg_list], 0)
Example #4
0
    def pull_model(self, client, pos_g, neg_g):
        with th.no_grad():
            entity_id = F.cat(seq=[pos_g.ndata["id"], neg_g.ndata["id"]], dim=0)
            relation_id = pos_g.edata["id"]
            entity_id = F.tensor(np.unique(F.asnumpy(entity_id)))
            relation_id = F.tensor(np.unique(F.asnumpy(relation_id)))

            l2g = client.get_local2global()
            global_entity_id = l2g[entity_id]

            entity_data = client.pull(name="entity_emb", id_tensor=global_entity_id)
            relation_data = client.pull(name="relation_emb", id_tensor=relation_id)

            self.entity_emb.emb[entity_id] = entity_data
            self.relation_emb.emb[relation_id] = relation_data
Example #5
0
def segmented_knn_graph(x, k, segs):
    """Transforms the given point set to a directed graph, whose coordinates
    are given as a matrix.  The predecessors of each point are its k-nearest
    neighbors.

    The matrices are concatenated along the first axis, and are segmented by
    ``segs``.  Each block would be transformed into a separate graph.  The
    graphs will be unioned.

    Parameters
    ----------
    x : Tensor
        The input tensor.
    k : int
        The number of neighbors
    segs : iterable of int
        Number of points of each point set.
        Must sum up to the number of rows in ``x``.

    Returns
    -------
    DGLGraph
        The graph.  The node IDs are in the same order as ``x``.
    """
    n_total_points, _ = F.shape(x)
    offset = np.insert(np.cumsum(segs), 0, 0)

    h_list = F.split(x, segs, 0)
    dst = [
        F.argtopk(pairwise_squared_distance(h_g), k, 1, descending=False) +
        offset[i] for i, h_g in enumerate(h_list)
    ]
    dst = F.cat(dst, 0)
    src = F.arange(0, n_total_points).unsqueeze(1).expand(n_total_points, k)

    dst = F.reshape(dst, (-1, ))
    src = F.reshape(src, (-1, ))
    # !!! fix shape
    adj = sparse.csr_matrix(
        (F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src))),
        shape=(n_total_points, n_total_points))

    g = DGLGraph(adj, readonly=True)
    return g
Example #6
0
    def predict_neg_score(self, pos_g, neg_g, to_device=None, gpu_id=-1, trace=False,
                          neg_deg_sample=False):
        """Calculate the negative score.

        Parameters
        ----------
        pos_g : DGLGraph
            Graph holding positive edges.
        neg_g : DGLGraph
            Graph holding negative edges.
        to_device : func
            Function to move data into device.
        gpu_id : int
            Which gpu to move data to.
        trace : bool
            If True, trace the computation. This is required in training.
            If False, do not trace the computation.
            Default: False
        neg_deg_sample : bool
            If True, we use the head and tail nodes of the positive edges to
            construct negative edges.
            Default: False

        Returns
        -------
        tensor
            The negative score
        """
        num_chunks = neg_g.num_chunks
        chunk_size = neg_g.chunk_size
        neg_sample_size = neg_g.neg_sample_size
        mask = F.ones((num_chunks, chunk_size * (neg_sample_size + chunk_size)),
                      dtype=F.float32, ctx=F.context(pos_g.ndata['emb']))
        if neg_g.neg_head:
            neg_head_ids = neg_g.ndata['id'][neg_g.head_nid]
            neg_head = self.entity_emb(neg_head_ids, gpu_id, trace)
            head_ids, tail_ids = pos_g.all_edges(order='eid')
            if to_device is not None and gpu_id >= 0:
                tail_ids = to_device(tail_ids, gpu_id)
            tail = pos_g.ndata['emb'][tail_ids]
            rel = pos_g.edata['emb']

            # When we train a batch, we could use the head nodes of the positive edges to
            # construct negative edges. We construct a negative edge between a positive head
            # node and every positive tail node.
            # When we construct negative edges like this, we know there is one positive
            # edge for a positive head node among the negative edges. We need to mask
            # them.
            if neg_deg_sample:
                head = pos_g.ndata['emb'][head_ids]
                head = head.reshape(num_chunks, chunk_size, -1)
                neg_head = neg_head.reshape(num_chunks, neg_sample_size, -1)
                neg_head = F.cat([head, neg_head], 1)
                neg_sample_size = chunk_size + neg_sample_size
                mask[:,0::(neg_sample_size + 1)] = 0
            neg_head = neg_head.reshape(num_chunks * neg_sample_size, -1)
            neg_head, tail = self.head_neg_prepare(pos_g.edata['id'], num_chunks, neg_head, tail, gpu_id, trace)
            neg_score = self.head_neg_score(neg_head, rel, tail,
                                            num_chunks, chunk_size, neg_sample_size)
        else:
            neg_tail_ids = neg_g.ndata['id'][neg_g.tail_nid]
            neg_tail = self.entity_emb(neg_tail_ids, gpu_id, trace)
            head_ids, tail_ids = pos_g.all_edges(order='eid')
            if to_device is not None and gpu_id >= 0:
                head_ids = to_device(head_ids, gpu_id)
            head = pos_g.ndata['emb'][head_ids]
            rel = pos_g.edata['emb']

            # This is negative edge construction similar to the above.
            if neg_deg_sample:
                tail = pos_g.ndata['emb'][tail_ids]
                tail = tail.reshape(num_chunks, chunk_size, -1)
                neg_tail = neg_tail.reshape(num_chunks, neg_sample_size, -1)
                neg_tail = F.cat([tail, neg_tail], 1)
                neg_sample_size = chunk_size + neg_sample_size
                mask[:,0::(neg_sample_size + 1)] = 0
            neg_tail = neg_tail.reshape(num_chunks * neg_sample_size, -1)
            head, neg_tail = self.tail_neg_prepare(pos_g.edata['id'], num_chunks, head, neg_tail, gpu_id, trace)
            neg_score = self.tail_neg_score(head, rel, neg_tail,
                                            num_chunks, chunk_size, neg_sample_size)

        if neg_deg_sample:
            neg_g.neg_sample_size = neg_sample_size
            mask = mask.reshape(num_chunks, chunk_size, neg_sample_size)
            return neg_score * mask
        else:
            return neg_score
Example #7
0
    def score(self, head, rel, tail, triplet_wise=False):
        head_emb = self.entity_emb(head)
        rel_emb = self.relation_emb(rel)
        tail_emb = self.entity_emb(tail)

        num_head = F.shape(head)[0]
        num_rel = F.shape(rel)[0]
        num_tail = F.shape(tail)[0]

        batch_size = self.batch_size
        score = []
        if triplet_wise:

            class FakeEdge(object):
                def __init__(self, head_emb, rel_emb, tail_emb):
                    self._hobj = {}
                    self._robj = {}
                    self._tobj = {}
                    self._hobj['emb'] = head_emb
                    self._robj['emb'] = rel_emb
                    self._tobj['emb'] = tail_emb

                @property
                def src(self):
                    return self._hobj

                @property
                def dst(self):
                    return self._tobj

                @property
                def data(self):
                    return self._robj

            for i in range((num_head + batch_size - 1) // batch_size):
                sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \
                                                   if (i + 1) * batch_size < num_head \
                                                   else num_head]
                sr_emb = rel_emb[i * batch_size : (i + 1) * batch_size \
                                                  if (i + 1) * batch_size < num_head \
                                                  else num_head]
                st_emb = tail_emb[i * batch_size : (i + 1) * batch_size \
                                                   if (i + 1) * batch_size < num_head \
                                                   else num_head]
                edata = FakeEdge(sh_emb, sr_emb, st_emb)
                score.append(
                    F.copy_to(
                        self.score_func.edge_func(edata)['score'], F.cpu()))
            score = F.cat(score, dim=0)
            return score
        else:
            for i in range((num_head + batch_size - 1) // batch_size):
                sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \
                                                   if (i + 1) * batch_size < num_head \
                                                   else num_head]
                s_score = []
                for j in range((num_tail + batch_size - 1) // batch_size):
                    st_emb = tail_emb[j * batch_size : (j + 1) * batch_size \
                                                       if (j + 1) * batch_size < num_tail \
                                                       else num_tail]

                    s_score.append(
                        F.copy_to(
                            self.score_func.infer(sh_emb, rel_emb, st_emb),
                            F.cpu()))
                score.append(F.cat(s_score, dim=2))
            score = F.cat(score, dim=0)
            return F.reshape(score, (num_head * num_rel * num_tail, ))
Example #8
0
    def topK(self, head=None, tail=None, bcast=False, pair_ws=False, k=10):
        if head is None:
            head = F.arange(0, self.emb.shape[0])
        else:
            head = F.tensor(head)
        if tail is None:
            tail = F.arange(0, self.emb.shape[0])
        else:
            tail = F.tensor(tail)

        head_emb = self.emb[head]
        tail_emb = self.emb[tail]
        if pair_ws is True:
            result = []
            batch_size = self.batch_size
            # chunked cal score
            score = []
            num_head = head.shape[0]
            num_tail = tail.shape[0]
            for i in range((num_head + batch_size - 1) // batch_size):
                sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \
                                                   if (i + 1) * batch_size < num_head \
                                                   else num_head]
                sh_emb = F.copy_to(sh_emb, self.device)
                st_emb = tail_emb[i * batch_size : (i + 1) * batch_size \
                                                   if (i + 1) * batch_size < num_head \
                                                   else num_head]
                st_emb = F.copy_to(st_emb, self.device)
                score.append(F.copy_to(self.sim_func(sh_emb, st_emb, pw=True), F.cpu()))
            score = F.cat(score, dim=0)

            sidx = F.argsort(score, dim=0, descending=True)
            sidx = sidx[:k]
            score = score[sidx]
            result.append((F.asnumpy(head[sidx]),
                           F.asnumpy(tail[sidx]),
                           F.asnumpy(score)))
        else:
            num_head = head.shape[0]
            num_tail = tail.shape[0]
            batch_size = self.batch_size

            # chunked cal score
            score = []
            for i in range((num_head + batch_size - 1) // batch_size):
                sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \
                                            if (i + 1) * batch_size < num_head \
                                            else num_head]
                sh_emb = F.copy_to(sh_emb, self.device)
                s_score = []
                for j in range((num_tail + batch_size - 1) // batch_size):
                    st_emb = tail_emb[j * batch_size : (j + 1) * batch_size \
                                                    if (j + 1) * batch_size < num_tail \
                                                    else num_tail]
                    st_emb = F.copy_to(st_emb, self.device)
                    s_score.append(F.copy_to(self.sim_func(sh_emb, st_emb), F.cpu()))
                score.append(F.cat(s_score, dim=1))
            score = F.cat(score, dim=0)

            if bcast is False:
                result = []
                idx = F.arange(0, num_head * num_tail)
                score = F.reshape(score, (num_head * num_tail, ))

                sidx = F.argsort(score, dim=0, descending=True)
                sidx = sidx[:k]
                score = score[sidx]
                sidx = sidx
                idx = idx[sidx]
                tail_idx = idx % num_tail
                idx = floor_divide(idx, num_tail)
                head_idx = idx % num_head

                result.append((F.asnumpy(head[head_idx]),
                           F.asnumpy(tail[tail_idx]),
                           F.asnumpy(score)))

            else: # bcast at head
                result = []
                for i in range(num_head):
                    i_score = score[i]

                    sidx = F.argsort(i_score, dim=0, descending=True)
                    idx = F.arange(0, num_tail)
                    i_idx = sidx[:k]
                    i_score = i_score[i_idx]
                    idx = idx[i_idx]

                    result.append((np.full((k,), F.asnumpy(head[i])),
                                  F.asnumpy(tail[idx]),
                                  F.asnumpy(i_score)))

        return result
Example #9
0
    def predict_neg_score(self,
                          pos_g,
                          neg_g,
                          to_device=None,
                          gpu_id=-1,
                          trace=False,
                          neg_deg_sample=False):
        num_chunks = neg_g.num_chunks
        chunk_size = neg_g.chunk_size
        neg_sample_size = neg_g.neg_sample_size
        mask = F.ones(
            (num_chunks, chunk_size * (neg_sample_size + chunk_size)),
            dtype=F.float32,
            ctx=F.context(pos_g.ndata['emb']))
        if neg_g.neg_head:
            neg_head_ids = neg_g.ndata['id'][neg_g.head_nid]
            neg_head = self.entity_emb(neg_head_ids, gpu_id, trace)
            head_ids, tail_ids = pos_g.all_edges(order='eid')
            if to_device is not None and gpu_id >= 0:
                tail_ids = to_device(tail_ids, gpu_id)
            tail = pos_g.ndata['emb'][tail_ids]
            rel = pos_g.edata['emb']

            # When we train a batch, we could use the head nodes of the positive edges to
            # construct negative edges. We construct a negative edge between a positive head
            # node and every positive tail node.
            # When we construct negative edges like this, we know there is one positive
            # edge for a positive head node among the negative edges. We need to mask
            # them.
            if neg_deg_sample:
                head = pos_g.ndata['emb'][head_ids]
                head = head.reshape(num_chunks, chunk_size, -1)
                neg_head = neg_head.reshape(num_chunks, neg_sample_size, -1)
                neg_head = F.cat([head, neg_head], 1)
                neg_sample_size = chunk_size + neg_sample_size
                mask[:, 0::(neg_sample_size + 1)] = 0
            neg_head = neg_head.reshape(num_chunks * neg_sample_size, -1)
            neg_head, tail = self.head_neg_prepare(pos_g.edata['id'],
                                                   num_chunks, neg_head, tail,
                                                   gpu_id, trace)
            neg_score = self.head_neg_score(neg_head, rel, tail, num_chunks,
                                            chunk_size, neg_sample_size)
        else:
            neg_tail_ids = neg_g.ndata['id'][neg_g.tail_nid]
            neg_tail = self.entity_emb(neg_tail_ids, gpu_id, trace)
            head_ids, tail_ids = pos_g.all_edges(order='eid')
            if to_device is not None and gpu_id >= 0:
                head_ids = to_device(head_ids, gpu_id)
            head = pos_g.ndata['emb'][head_ids]
            rel = pos_g.edata['emb']

            # This is negative edge construction similar to the above.
            if neg_deg_sample:
                tail = pos_g.ndata['emb'][tail_ids]
                tail = tail.reshape(num_chunks, chunk_size, -1)
                neg_tail = neg_tail.reshape(num_chunks, neg_sample_size, -1)
                neg_tail = F.cat([tail, neg_tail], 1)
                neg_sample_size = chunk_size + neg_sample_size
                mask[:, 0::(neg_sample_size + 1)] = 0
            neg_tail = neg_tail.reshape(num_chunks * neg_sample_size, -1)
            head, neg_tail = self.tail_neg_prepare(pos_g.edata['id'],
                                                   num_chunks, head, neg_tail,
                                                   gpu_id, trace)
            neg_score = self.tail_neg_score(head, rel, neg_tail, num_chunks,
                                            chunk_size, neg_sample_size)

        if neg_deg_sample:
            neg_g.neg_sample_size = neg_sample_size
            mask = mask.reshape(num_chunks, chunk_size, neg_sample_size)
            return neg_score * mask
        else:
            return neg_score
Example #10
0
    def process(self):
        with open(os.path.join(self.raw_dir, "gdb9.sdf.csv"), 'r') as f:
            target = f.read().split('\n')[1:-1]
            target = [[float(x) for x in line.split(',')[1:20]]
                      for line in target]
            target = F.tensor(target, dtype=F.data_type_dict['float32'])
            target = F.cat([target[:, 3:], target[:, :3]], dim=-1)
            target = (target * conversion.view(1, -1)).tolist()

        with open(os.path.join(self.raw_dir, "uncharacterized.txt"), 'r') as f:
            skip = [int(x.split()[0]) - 1 for x in f.read().split('\n')[9:-2]]

        suppl = Chem.SDMolSupplier(os.path.join(self.raw_dir, "gdb9.sdf"),
                                   removeHs=False,
                                   sanitize=False)

        Ns = []
        R = []
        Z = []
        H = []
        A = []
        NE = []
        E = []
        B = []
        T = []

        for i, mol in enumerate(suppl):
            if i in skip:
                continue

            N = mol.GetNumAtoms()

            pos = suppl.GetItemText(i).split('\n')[4:4 + N]
            pos = [[float(x) for x in line.split()[:3]] for line in pos]

            type_idx = []
            atomic_number = []
            aromatic = []
            hybr = []
            for atom in mol.GetAtoms():
                type_idx.append(self.types[atom.GetSymbol()])
                atomic_number.append(atom.GetAtomicNum())
                aromatic.append(1 if atom.GetIsAromatic() else 0)
                hybr.append(self.hybr_types[atom.GetHybridization()])

            row, edge_type = [], []
            for bond in mol.GetBonds():
                start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
                row += [start, end]
                edge_type += 2 * [self.bonds[bond.GetBondType()]]
            Ns.append(N)
            R += pos
            Z += atomic_number
            H += hybr
            A += aromatic
            NE.append(len(mol.GetBonds()))
            E += row
            B += edge_type
            T += target[i]
        self.N = Ns
        self.R = R
        self.Z = Z
        self.H = H
        self.A = A
        self.NE = NE
        self.E = E
        self.B = B
        self.T = T
        self.N_cumsum = np.concatenate([[0], np.cumsum(self.N)])
        self.NE_cumsum = np.concatenate([[0], np.cumsum(self.NE)])
Example #11
0
    def pull(self, name, id_tensor):
        """Pull message from KVServer.

        Parameters
        ----------
        name : str
            data name
        id_tensor : tensor (mx.ndarray or torch.tensor)
            a vector storing the ID list

        Returns
        -------
        tensor
            a data tensor with the same row size of id_tensor.
        """
        assert len(name) > 0, 'name cannot be empty.'
        assert F.ndim(id_tensor) == 1, 'ID must be a vector.'

        # partition data (we can move this part of code into C-api if needed)
        server_id = self._data_store[name + '-part-'][id_tensor]
        # sort index by server id
        sorted_id = np.argsort(F.asnumpy(server_id))
        # we need return data with original order of ID
        back_sorted_id = F.tensor(np.argsort(sorted_id))
        id_tensor = id_tensor[F.tensor(sorted_id)]
        server, count = np.unique(F.asnumpy(server_id), return_counts=True)
        # pull data from server by server order
        start = 0
        pull_count = 0
        local_data = {}
        for idx in range(len(server)):
            end = start + count[idx]
            if start == end:  # don't have any data in target server
                continue
            partial_id = id_tensor[start:end]

            if server[
                    idx] in self._local_server_id and self._close_shared_mem == False:
                if (name + '-g2l-' in self._has_data) == True:
                    local_id = self._data_store[name + '-g2l-'][partial_id]
                else:
                    local_id = partial_id
                local_data[server[idx]] = self._pull_handler(
                    name + '-data-', local_id, self._data_store)
            else:
                msg = KVStoreMsg(type=KVMsgType.PULL,
                                 rank=self._client_id,
                                 name=name,
                                 id=partial_id,
                                 data=None)
                _send_kv_msg(self._sender, msg, server[idx])
                pull_count += 1

            start += count[idx]

        msg_list = []
        for server_id, data in local_data.items():
            local_msg = KVStoreMsg(type=KVMsgType.PULL_BACK,
                                   rank=server_id,
                                   name=name,
                                   id=None,
                                   data=data)
            msg_list.append(local_msg)

        # wait message from server nodes
        for idx in range(pull_count):
            msg_list.append(_recv_kv_msg(self._receiver))

        # sort msg by server id
        msg_list.sort(key=self._takeId)
        data_tensor = F.cat(seq=[msg.data for msg in msg_list], dim=0)

        return data_tensor[
            back_sorted_id]  # return data with original index order