Ejemplo n.º 1
0
def create_graph(num_part, dist_graph_path, hetero):
    if not hetero:
        g = dgl.rand_graph(10000, 42000)
        g.ndata['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
        g.edata['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
        partition_graph(g, graph_name, num_part, dist_graph_path)
    else:
        from scipy import sparse as spsp
        num_nodes = {'n1': 10000, 'n2': 10010, 'n3': 10020}
        etypes = [('n1', 'r1', 'n2'), ('n1', 'r2', 'n3'), ('n2', 'r3', 'n3')]
        edges = {}
        for etype in etypes:
            src_ntype, _, dst_ntype = etype
            arr = spsp.random(num_nodes[src_ntype],
                              num_nodes[dst_ntype],
                              density=0.001,
                              format='coo',
                              random_state=100)
            edges[etype] = (arr.row, arr.col)
        g = dgl.heterograph(edges, num_nodes)
        g.nodes['n1'].data['feat'] = F.unsqueeze(
            F.arange(0, g.number_of_nodes('n1')), 1)
        g.edges['r1'].data['feat'] = F.unsqueeze(
            F.arange(0, g.number_of_edges('r1')), 1)
        partition_graph(g, graph_name, num_part, dist_graph_path)
Ejemplo n.º 2
0
def knn_graphE(x, k, istrain=False):
    """Transforms the given point set to a directed graph, whose coordinates
    are given as a matrix. The predecessors of each point are its k-nearest
    neighbors.

    If a 3D tensor is given instead, then each row would be transformed into
    a separate graph.  The graphs will be unioned.

    Parameters
    ----------
    x : Tensor
        The input tensor.

        If 2D, each row of ``x`` corresponds to a node.

        If 3D, a k-NN graph would be constructed for each row.  Then
        the graphs are unioned.
    k : int
        The number of neighbors

    Returns
    -------
    DGLGraph
        The graph.  The node IDs are in the same order as ``x``.
    """
    if F.ndim(x) == 2:
        x = F.unsqueeze(x, 0)
    n_samples, n_points, _ = F.shape(x)

    dist = pairwise_squared_distance(x)
    if istrain and np.random.rand() > 0.5:
        k_indices = F.argtopk(dist, round(1.5 * k), 2, descending=False)
        rand_k = np.random.permutation(round(1.5 * k) -
                                       1)[0:k - 1] + 1  # 0 + random k-1
        rand_k = np.append(rand_k, 0)
        k_indices = k_indices[:, :, rand_k]  # add 0
    else:
        k_indices = F.argtopk(dist, k, 2, descending=False)

    dst = F.copy_to(k_indices, F.cpu())

    src = F.zeros_like(dst) + F.reshape(F.arange(0, n_points), (1, -1, 1))

    per_sample_offset = F.reshape(
        F.arange(0, n_samples) * n_points, (-1, 1, 1))
    dst += per_sample_offset
    src += per_sample_offset
    dst = F.reshape(dst, (-1, ))
    src = F.reshape(src, (-1, ))
    adj = sparse.csr_matrix(
        (F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src))))

    g = DGLGraph(adj, readonly=True)
    return g
Ejemplo n.º 3
0
def generate_rand_graph(n, func_name):
    arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(
        np.int64)
    g = dgl.DGLGraph(arr, readonly=True)
    num_rels = 10
    entity_emb = F.uniform((g.number_of_nodes(), 10), F.float32, F.cpu(), 0, 1)
    if func_name == 'RotatE':
        entity_emb = F.uniform((g.number_of_nodes(), 20), F.float32, F.cpu(),
                               0, 1)
    rel_emb = F.uniform((num_rels, 10), F.float32, F.cpu(), -1, 1)
    if func_name == 'RESCAL':
        rel_emb = F.uniform((num_rels, 10 * 10), F.float32, F.cpu(), 0, 1)
    g.ndata['id'] = F.arange(0, g.number_of_nodes())
    rel_ids = np.random.randint(0,
                                num_rels,
                                g.number_of_edges(),
                                dtype=np.int64)
    g.edata['id'] = F.tensor(rel_ids, F.int64)
    # TransR have additional projection_emb
    if (func_name == 'TransR'):
        args = {'gpu': -1, 'lr': 0.1}
        args = dotdict(args)
        projection_emb = ExternalEmbedding(args, 10, 10 * 10, F.cpu())
        return g, entity_emb, rel_emb, (12.0, projection_emb, 10, 10)
    elif (func_name == 'TransE'):
        return g, entity_emb, rel_emb, (12.0)
    elif (func_name == 'RESCAL'):
        return g, entity_emb, rel_emb, (10, 10)
    elif (func_name == 'RotatE'):
        return g, entity_emb, rel_emb, (12.0, 1.0)
    else:
        return g, entity_emb, rel_emb, None
Ejemplo n.º 4
0
def start_server(args):
    """Start kvstore service
    """
    server_namebook = dgl.contrib.read_ip_config(filename=args.ip_config)

    my_server = KVServer(server_id=args.server_id,
                         server_namebook=server_namebook,
                         num_client=args.num_client)

    data = F.zeros((num_entries, args.dim_size), F.float32, F.cpu())
    g2l = F.zeros(num_entries * args.num_servers, F.int64, F.cpu())
    start = num_entries * my_server.get_machine_id()
    end = num_entries * (my_server.get_machine_id() + 1)
    g2l[start:end] = F.arange(0, num_entries)

    partition = np.arange(args.num_servers)
    partition = F.tensor(np.repeat(partition, num_entries))
    if my_server.get_id() % my_server.get_group_count() == 0:  # master server
        my_server.set_global2local(name='entity_embed', global2local=g2l)
        my_server.init_data(name='entity_embed', data_tensor=data)
        my_server.set_partition_book(name='entity_embed',
                                     partition_book=partition)
    else:
        my_server.set_global2local(name='entity_embed')
        my_server.init_data(name='entity_embed')
        my_server.set_partition_book(name='entity_embed')

    my_server.print()

    my_server.start()
Ejemplo n.º 5
0
    def __init__(self, dataset, args):
        pickle_name = 'graph_all.pickle'
        if args.pickle_graph and os.path.exists(
                os.path.join(args.data_path, args.dataset, pickle_name)):
            with open(os.path.join(args.data_path, args.dataset, pickle_name),
                      'rb') as graph_file:
                g = pickle.load(graph_file)
                print('Load pickled graph.')
        else:
            src = np.concatenate(
                (dataset.train[0], dataset.valid[0], dataset.test[0]))
            etype_id = np.concatenate(
                (dataset.train[1], dataset.valid[1], dataset.test[1]))
            dst = np.concatenate(
                (dataset.train[2], dataset.valid[2], dataset.test[2]))
            coo = sp.sparse.coo_matrix(
                (np.ones(len(src)), (src, dst)),
                shape=[dataset.n_entities, dataset.n_entities])
            g = dgl.DGLGraph(coo, readonly=True, sort_csr=True)
            g.ndata['id'] = F.arange(0, g.number_of_nodes())
            g.edata['id'] = F.tensor(etype_id, F.int64)
            if args.pickle_graph:
                with open(
                        os.path.join(args.data_path, args.dataset,
                                     pickle_name), 'wb') as graph_file:
                    pickle.dump(g, graph_file)
        self.g = g

        self.num_train = len(dataset.train[0])
        self.num_valid = len(dataset.valid[0])
        self.num_test = len(dataset.test[0])

        if args.eval_percent < 1:
            self.valid = np.random.randint(
                0,
                self.num_valid,
                size=(int(
                    self.num_valid * args.eval_percent), )) + self.num_train
        else:
            self.valid = np.arange(self.num_train,
                                   self.num_train + self.num_valid)
        print('|valid|:', len(self.valid))

        if args.eval_percent < 1:
            self.test = np.random.randint(
                0,
                self.num_test,
                size=(int(self.num_test * args.eval_percent, )))
            self.test += self.num_train + self.num_valid
        else:
            self.test = np.arange(self.num_train + self.num_valid,
                                  self.g.number_of_edges())
        print('|test|:', len(self.test))

        self.num_valid = len(self.valid)
        self.num_test = len(self.test)
    def __init__(self, dataset, args):
        triples = dataset.train + dataset.valid + dataset.test
        pickle_name = "graph_all.pickle"
        if args.pickle_graph and os.path.exists(
                os.path.join(args.data_path, args.dataset, pickle_name)):
            with open(os.path.join(args.data_path, args.dataset, pickle_name),
                      "rb") as graph_file:
                g = pickle.load(graph_file)
                print("Load pickled graph.")
        else:
            src = [t[0] for t in triples]
            etype_id = [t[1] for t in triples]
            dst = [t[2] for t in triples]
            coo = sp.sparse.coo_matrix(
                (np.ones(len(src)), (src, dst)),
                shape=[dataset.n_entities, dataset.n_entities])
            g = dgl.DGLGraph(coo, readonly=True, sort_csr=True)
            g.ndata["id"] = F.arange(0, g.number_of_nodes())
            g.edata["id"] = F.tensor(etype_id, F.int64)
            if args.pickle_graph:
                with open(
                        os.path.join(args.data_path, args.dataset,
                                     pickle_name), "wb") as graph_file:
                    pickle.dump(g, graph_file)
        self.g = g

        self.num_train = len(dataset.train)
        self.num_valid = len(dataset.valid)
        self.num_test = len(dataset.test)

        if args.eval_percent < 1:
            self.valid = (np.random.randint(
                0,
                self.num_valid,
                size=(int(self.num_valid * args.eval_percent), )) +
                          self.num_train)
        else:
            self.valid = np.arange(self.num_train,
                                   self.num_train + self.num_valid)
        print("|valid|:", len(self.valid))

        if args.eval_percent < 1:
            self.test = np.random.randint(
                0,
                self.num_test,
                size=(int(self.num_test * args.eval_percent, )),
            )
            self.test += self.num_train + self.num_valid
        else:
            self.test = np.arange(self.num_train + self.num_valid,
                                  self.g.number_of_edges())
        print("|test|:", len(self.test))

        self.num_valid = len(self.valid)
        self.num_test = len(self.test)
Ejemplo n.º 7
0
    def __init__(self,
                 train_data,
                 rank,
                 batch_size,
                 shuffle,
                 rel_weight,
                 neg_sample_size,
                 chunk_size,
                 exclude_positive=False,
                 replacement=False,
                 reset=True,
                 drop_last=True):
        # seed_edges are the index of triple
        g = train_data.g
        seed_edges = train_data.edge_parts[rank]
        if seed_edges is None:
            seed_edges = F.arange(0, g.number_of_edges())
        assert batch_size % chunk_size == 0, 'batch size {} must be divisible by chunk size {} to enable chunk negative sampling'.format(
            batch_size, chunk_size)
        self.rels = g.edata['tid'][seed_edges]
        heads, tails = g.all_edges(order='eid')
        self.heads = heads[seed_edges]
        self.tails = tails[seed_edges]
        self.node_pool = g.nodes()
        self.reset = reset
        self.replacement = replacement
        # self.chunk_size = chunk_size
        # self.neg_sample_size = neg_sample_size
        # TODO mask all false negative rels
        self.exclude_positive = exclude_positive
        self.drop_last = drop_last
        # might be replaced by rel weight vector provided
        self.rel_weight = th.ones(
            len(self.rels),
            dtype=th.float32) if rel_weight is None else rel_weight[seed_edges]
        # shuffle data
        if shuffle:
            # MARK - whether to shuffle data or shuffle indices only?
            self.node_pool = self.node_pool[th.randperm(len(self.node_pool))]
            idx = th.randperm(len(self.rels))
            self.rels = self.rels[idx]
            self.heads = self.heads[idx]
            self.tails = self.tails[idx]
            # the rel weight need to shuffle together to ensure consistency
            self.rel_weight = self.rel_weight[idx]

        self.batch_size = batch_size
        self.pool_size = self.batch_size // chunk_size * neg_sample_size
        self.iter_idx = 0
        self.pool_idx = 0
        self.step = 0
Ejemplo n.º 8
0
def generate_rand_graph(n):
    arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(
        np.int64)
    g = dgl.DGLGraph(arr, readonly=True)
    num_rels = 10
    entity_emb = F.uniform((g.number_of_nodes(), 10), F.float32, F.cpu(), 0, 1)
    rel_emb = F.uniform((num_rels, 10), F.float32, F.cpu(), 0, 1)
    g.ndata['id'] = F.arange(0, g.number_of_nodes())
    rel_ids = np.random.randint(0,
                                num_rels,
                                g.number_of_edges(),
                                dtype=np.int64)
    g.edata['id'] = F.tensor(rel_ids, F.int64)
    return g, entity_emb, rel_emb
Ejemplo n.º 9
0
def ConstructGraph(edges, n_entities, args):
    pickle_name = 'graph_train.pickle'
    if args.pickle_graph and os.path.exists(os.path.join(args.data_path, args.dataset, pickle_name)):
        with open(os.path.join(args.data_path, args.dataset, pickle_name), 'rb') as graph_file:
            g = pickle.load(graph_file)
            print('Load pickled graph.')
    else:
        src, etype_id, dst = edges
        coo = sp.sparse.coo_matrix((np.ones(len(src)), (src, dst)), shape=[n_entities, n_entities])
        g = dgl.DGLGraph(coo, readonly=True, sort_csr=True)
        g.ndata['id'] = F.arange(0, g.number_of_nodes())
        g.edata['id'] = F.tensor(etype_id, F.int64)
        if args.pickle_graph:
            with open(os.path.join(args.data_path, args.dataset, pickle_name), 'wb') as graph_file:
                pickle.dump(g, graph_file)
    return g
Ejemplo n.º 10
0
def segmented_knn_graph(x, k, segs):
    """Transforms the given point set to a directed graph, whose coordinates
    are given as a matrix.  The predecessors of each point are its k-nearest
    neighbors.

    The matrices are concatenated along the first axis, and are segmented by
    ``segs``.  Each block would be transformed into a separate graph.  The
    graphs will be unioned.

    Parameters
    ----------
    x : Tensor
        The input tensor.
    k : int
        The number of neighbors
    segs : iterable of int
        Number of points of each point set.
        Must sum up to the number of rows in ``x``.

    Returns
    -------
    DGLGraph
        The graph.  The node IDs are in the same order as ``x``.
    """
    n_total_points, _ = F.shape(x)
    offset = np.insert(np.cumsum(segs), 0, 0)

    h_list = F.split(x, segs, 0)
    dst = [
        F.argtopk(pairwise_squared_distance(h_g), k, 1, descending=False) +
        offset[i] for i, h_g in enumerate(h_list)
    ]
    dst = F.cat(dst, 0)
    src = F.arange(0, n_total_points).unsqueeze(1).expand(n_total_points, k)

    dst = F.reshape(dst, (-1, ))
    src = F.reshape(src, (-1, ))
    # !!! fix shape
    adj = sparse.csr_matrix(
        (F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src))),
        shape=(n_total_points, n_total_points))

    g = DGLGraph(adj, readonly=True)
    return g
Ejemplo n.º 11
0
def check_topk_score2(score_model, exclude_mode):
    num_entity = 40
    num_rels = 4

    src = F.arange(0, num_entity)
    dst1 = src + 1
    dst1[num_entity-1] = 0
    dst2 = src - 1
    dst2[0] = num_entity-1
    src = F.cat([src, src], dim=0)
    dst = F.cat([dst1, dst2], dim=0)
    src = F.cat([src, src, src, src], dim=0)
    dst = F.cat([dst, dst, dst, dst], dim=0)
    etype = F.cat([th.full((num_entity*2,), 0, dtype=th.long),
                    th.full((num_entity*2,), 1, dtype=th.long),
                    th.full((num_entity*2,), 2, dtype=th.long),
                    th.full((num_entity*2,), 3, dtype=th.long)],
                    dim=0)
    g = dgl.graph((src, dst))
    g.edata['tid'] = etype

    _check_topk_score2(score_model, g, num_entity, num_rels, exclude_mode)
def ConstructGraph(edges, n_entities, i, args):
    pickle_name = "graph_train_{}.pickle".format(i)
    if args.pickle_graph and os.path.exists(
            os.path.join(args.data_path, args.dataset, pickle_name)):
        with open(os.path.join(args.data_path, args.dataset, pickle_name),
                  "rb") as graph_file:
            g = pickle.load(graph_file)
            print("Load pickled graph.")
    else:
        src = [t[0] for t in edges]
        etype_id = [t[1] for t in edges]
        dst = [t[2] for t in edges]
        coo = sp.sparse.coo_matrix((np.ones(len(src)), (src, dst)),
                                   shape=[n_entities, n_entities])
        g = dgl.DGLGraph(coo, readonly=True, sort_csr=True)
        g.ndata["id"] = F.arange(0, g.number_of_nodes())
        g.edata["id"] = F.tensor(etype_id, F.int64)
        if args.pickle_graph:
            with open(os.path.join(args.data_path, args.dataset, pickle_name),
                      "wb") as graph_file:
                pickle.dump(g, graph_file)
    return g
Ejemplo n.º 13
0
def run_topk_emb2(sfunc, sim_func, emb_model):
    hidden_dim = 32
    num_head = 40
    num_tail = 40
    num_emb = 80

    with tempfile.TemporaryDirectory() as tmpdirname:
        emb = F.uniform((num_emb, hidden_dim), F.float32, F.cpu(), -1, 1)
        create_emb_file(Path(tmpdirname), 'entity.npy', emb.numpy())
        create_emb_file(Path(tmpdirname), 'relation.npy', emb.numpy())

        emb_model.load(Path(tmpdirname))

    head = F.arange(0, num_head)
    tail = F.arange(num_head, num_head+num_tail)
    result1 = emb_model.embed_sim(head, tail, 'entity', sfunc=sfunc, pair_ws=True)
    scores = []
    head_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        j = i
        hemb = F.take(emb, head[i], 0)
        temb = F.take(emb, tail[j], 0)

        score = sim_func(hemb, temb)
        scores.append(F.asnumpy(score))
        head_ids.append(F.asnumpy(head[i]))
        tail_ids.append(F.asnumpy(tail[j]))
    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    idx = idx[:10]
    head_ids = head_ids[idx]
    tail_ids = tail_ids[idx]
    score_topk = scores[idx]

    r1_head, r1_tail, r1_score = result1[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)
    print('pass pair wise')

    head = F.arange(0, num_head)
    tail = F.arange(num_head, num_head+num_tail)
    result1 = emb_model.embed_sim(head, tail, 'entity', sfunc=sfunc)
    assert len(result1) == 1
    scores = []
    head_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        for j in range(tail.shape[0]):
            hemb = F.take(emb, head[i], 0)
            temb = F.take(emb, tail[j], 0)

            score = sim_func(hemb, temb)
            scores.append(F.asnumpy(score))
            head_ids.append(F.asnumpy(head[i]))
            tail_ids.append(F.asnumpy(tail[j]))
    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    idx = idx[:10]
    head_ids = head_ids[idx]
    tail_ids = tail_ids[idx]
    score_topk = scores[idx]

    r1_head, r1_tail, r1_score = result1[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)

    emb_ids = F.arange(0, num_emb)
    result1 = emb_model.embed_sim(emb_ids, emb_ids, 'entity', sfunc=sfunc, bcast=True)
    result2 = emb_model.embed_sim(embed_type='entity', sfunc=sfunc, bcast=True)
    assert len(result1) == emb_ids.shape[0]
    assert len(result2) == emb_ids.shape[0]

    for i in range(emb_ids.shape[0]):
        scores = []
        head_ids = []
        tail_ids = []
        for j in range(emb_ids.shape[0]):
            hemb = F.take(emb, emb_ids[i], 0)
            temb = F.take(emb, emb_ids[j], 0)

            score = sim_func(hemb, temb)
            score = F.asnumpy(score)
            scores.append(score)
            tail_ids.append(F.asnumpy(emb_ids[j]))
        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        idx = idx[:10]
        head_ids = np.full((10,), F.asnumpy(emb_ids[i]))
        tail_ids = tail_ids[idx]
        score_topk = scores[idx]

        r1_head, r1_tail, r1_score = result1[i]
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        r2_head, r2_tail, r2_score = result2[i]
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)
    print('pass all')
Ejemplo n.º 14
0
    def topK(self, head=None, rel=None, tail=None, exec_mode='all', k=10):
        if head is None:
            head = F.arange(0, self.model.num_entity)
        else:
            head = F.tensor(head)
        if rel is None:
            rel = F.arange(0, self.model.num_rel)
        else:
            rel = F.tensor(rel)
        if tail is None:
            tail = F.arange(0, self.model.num_entity)
        else:
            tail = F.tensor(tail)

        num_head = F.shape(head)[0]
        num_rel = F.shape(rel)[0]
        num_tail = F.shape(tail)[0]

        if exec_mode == 'triplet_wise':
            result = []
            assert num_head == num_rel, \
                'For triplet wise exection mode, head, relation and tail lists should have same length'
            assert num_head == num_tail, \
                'For triplet wise exection mode, head, relation and tail lists should have same length'

            raw_score = self.model.score(head, rel, tail, triplet_wise=True)
            score = self.score_func(raw_score)
            idx = F.arange(0, num_head)

            sidx = F.argsort(score, dim=0, descending=True)
            sidx = sidx[:k]
            score = score[sidx]
            idx = idx[sidx]

            result.append((F.asnumpy(head[idx]),
                           F.asnumpy(rel[idx]),
                           F.asnumpy(tail[idx]),
                           F.asnumpy(score)))
        elif exec_mode == 'all':
            result = []
            raw_score = self.model.score(head, rel, tail)
            score = self.score_func(raw_score)
            idx = F.arange(0, num_head * num_rel * num_tail)

            sidx = F.argsort(score, dim=0, descending=True)
            sidx = sidx[:k]
            score = score[sidx]
            idx = idx[sidx]

            tail_idx = idx % num_tail
            idx = floor_divide(idx, num_tail)
            rel_idx = idx % num_rel
            idx = floor_divide(idx, num_rel)
            head_idx = idx % num_head

            result.append((F.asnumpy(head[head_idx]),
                           F.asnumpy(rel[rel_idx]),
                           F.asnumpy(tail[tail_idx]),
                           F.asnumpy(score)))
        elif exec_mode == 'batch_head':
            result = []
            for i in range(num_head):
                raw_score = self.model.score(F.unsqueeze(head[i], 0), rel, tail)
                score = self.score_func(raw_score)
                idx = F.arange(0, num_rel * num_tail)

                sidx = F.argsort(score, dim=0, descending=True)
                sidx = sidx[:k]
                score = score[sidx]
                idx = idx[sidx]
                tail_idx = idx % num_tail
                idx = floor_divide(idx, num_tail)
                rel_idx = idx % num_rel

                result.append((np.full((k,), F.asnumpy(head[i])),
                               F.asnumpy(rel[rel_idx]),
                               F.asnumpy(tail[tail_idx]),
                               F.asnumpy(score)))
        elif exec_mode == 'batch_rel':
            result = []
            for i in range(num_rel):
                raw_score = self.model.score(head, F.unsqueeze(rel[i], 0), tail)
                score = self.score_func(raw_score)
                idx = F.arange(0, num_head * num_tail)

                sidx = F.argsort(score, dim=0, descending=True)
                sidx = sidx[:k]
                score = score[sidx]
                idx = idx[sidx]
                tail_idx = idx % num_tail
                idx = floor_divide(idx, num_tail)
                head_idx = idx % num_head

                result.append((F.asnumpy(head[head_idx]),
                               np.full((k,), F.asnumpy(rel[i])),
                               F.asnumpy(tail[tail_idx]),
                               F.asnumpy(score)))
        elif exec_mode == 'batch_tail':
            result = []
            for i in range(num_tail):
                raw_score = self.model.score(head, rel, F.unsqueeze(tail[i], 0))
                score = self.score_func(raw_score)
                idx = F.arange(0, num_head * num_rel)

                sidx = F.argsort(score, dim=0, descending=True)
                sidx = sidx[:k]
                score = score[sidx]
                idx = idx[sidx]
                rel_idx = idx % num_rel
                idx = floor_divide(idx, num_rel)
                head_idx = idx % num_head
                result.append((F.asnumpy(head[head_idx]),
                               F.asnumpy(rel[rel_idx]),
                               np.full((k,), F.asnumpy(tail[i])),
                               F.asnumpy(score)))
        else:
            assert False, 'unknow execution mode type {}'.format(exec_mode)

        return result
Ejemplo n.º 15
0
    def topK(self, head=None, tail=None, bcast=False, pair_ws=False, k=10):
        if head is None:
            head = F.arange(0, self.emb.shape[0])
        else:
            head = F.tensor(head)
        if tail is None:
            tail = F.arange(0, self.emb.shape[0])
        else:
            tail = F.tensor(tail)

        head_emb = self.emb[head]
        tail_emb = self.emb[tail]
        if pair_ws is True:
            result = []
            batch_size = self.batch_size
            # chunked cal score
            score = []
            num_head = head.shape[0]
            num_tail = tail.shape[0]
            for i in range((num_head + batch_size - 1) // batch_size):
                sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \
                                                   if (i + 1) * batch_size < num_head \
                                                   else num_head]
                sh_emb = F.copy_to(sh_emb, self.device)
                st_emb = tail_emb[i * batch_size : (i + 1) * batch_size \
                                                   if (i + 1) * batch_size < num_head \
                                                   else num_head]
                st_emb = F.copy_to(st_emb, self.device)
                score.append(F.copy_to(self.sim_func(sh_emb, st_emb, pw=True), F.cpu()))
            score = F.cat(score, dim=0)

            sidx = F.argsort(score, dim=0, descending=True)
            sidx = sidx[:k]
            score = score[sidx]
            result.append((F.asnumpy(head[sidx]),
                           F.asnumpy(tail[sidx]),
                           F.asnumpy(score)))
        else:
            num_head = head.shape[0]
            num_tail = tail.shape[0]
            batch_size = self.batch_size

            # chunked cal score
            score = []
            for i in range((num_head + batch_size - 1) // batch_size):
                sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \
                                            if (i + 1) * batch_size < num_head \
                                            else num_head]
                sh_emb = F.copy_to(sh_emb, self.device)
                s_score = []
                for j in range((num_tail + batch_size - 1) // batch_size):
                    st_emb = tail_emb[j * batch_size : (j + 1) * batch_size \
                                                    if (j + 1) * batch_size < num_tail \
                                                    else num_tail]
                    st_emb = F.copy_to(st_emb, self.device)
                    s_score.append(F.copy_to(self.sim_func(sh_emb, st_emb), F.cpu()))
                score.append(F.cat(s_score, dim=1))
            score = F.cat(score, dim=0)

            if bcast is False:
                result = []
                idx = F.arange(0, num_head * num_tail)
                score = F.reshape(score, (num_head * num_tail, ))

                sidx = F.argsort(score, dim=0, descending=True)
                sidx = sidx[:k]
                score = score[sidx]
                sidx = sidx
                idx = idx[sidx]
                tail_idx = idx % num_tail
                idx = floor_divide(idx, num_tail)
                head_idx = idx % num_head

                result.append((F.asnumpy(head[head_idx]),
                           F.asnumpy(tail[tail_idx]),
                           F.asnumpy(score)))

            else: # bcast at head
                result = []
                for i in range(num_head):
                    i_score = score[i]

                    sidx = F.argsort(i_score, dim=0, descending=True)
                    idx = F.arange(0, num_tail)
                    i_idx = sidx[:k]
                    i_score = i_score[i_idx]
                    idx = idx[i_idx]

                    result.append((np.full((k,), F.asnumpy(head[i])),
                                  F.asnumpy(tail[idx]),
                                  F.asnumpy(i_score)))

        return result
Ejemplo n.º 16
0
def dist_train_test(args,
                    model,
                    train_sampler,
                    entity_pb,
                    relation_pb,
                    l2g,
                    rank=0,
                    rel_parts=None,
                    cross_rels=None,
                    barrier=None):
    if args.num_proc > 1:
        th.set_num_threads(args.num_thread)

    client = connect_to_kvstore(args, entity_pb, relation_pb, l2g)
    client.barrier()
    train_time_start = time.time()
    train(args, model, train_sampler, None, rank, rel_parts, cross_rels,
          barrier, client)
    client.barrier()
    print('Total train time {:.3f} seconds'.format(time.time() -
                                                   train_time_start))

    model = None

    if client.get_id() % args.num_client == 0:  # pull full model from kvstore

        args.num_test_proc = args.num_client
        dataset_full = get_dataset(args.data_path, args.dataset, args.format)

        print('Full data n_entities: ' + str(dataset_full.n_entities))
        print("Full data n_relations: " + str(dataset_full.n_relations))

        model_test = load_model(None, args, dataset_full.n_entities,
                                dataset_full.n_relations)
        eval_dataset = EvalDataset(dataset_full, args)

        if args.test:
            model_test.share_memory()

        if args.neg_sample_size_test < 0:
            args.neg_sample_size_test = dataset_full.n_entities
        args.eval_filter = not args.no_eval_filter
        if args.neg_deg_sample_eval:
            assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges."

        if args.neg_chunk_size_valid < 0:
            args.neg_chunk_size_valid = args.neg_sample_size_valid
        if args.neg_chunk_size_test < 0:
            args.neg_chunk_size_test = args.neg_sample_size_test

        print("Pull relation_emb ...")
        relation_id = F.arange(0, model_test.n_relations)
        relation_data = client.pull(name='relation_emb', id_tensor=relation_id)
        model_test.relation_emb.emb[relation_id] = relation_data

        print("Pull entity_emb ... ")
        # split model into 100 small parts
        start = 0
        percent = 0
        entity_id = F.arange(0, model_test.n_entities)
        count = int(model_test.n_entities / 100)
        end = start + count
        while True:
            print("Pull %d / 100 ..." % percent)
            if end >= model_test.n_entities:
                end = -1
            tmp_id = entity_id[start:end]
            entity_data = client.pull(name='entity_emb', id_tensor=tmp_id)
            model_test.entity_emb.emb[tmp_id] = entity_data
            if end == -1:
                break
            start = end
            end += count
            percent += 1

        if args.save_emb is not None:
            if not os.path.exists(args.save_emb):
                os.mkdir(args.save_emb)
            model_test.save_emb(args.save_emb, args.dataset)

        if args.test:
            args.num_thread = 1
            test_sampler_tails = []
            test_sampler_heads = []
            for i in range(args.num_test_proc):
                test_sampler_head = eval_dataset.create_sampler(
                    'test',
                    args.batch_size_eval,
                    args.neg_sample_size_test,
                    args.neg_chunk_size_test,
                    args.eval_filter,
                    mode='chunk-head',
                    num_workers=args.num_thread,
                    rank=i,
                    ranks=args.num_test_proc)
                test_sampler_tail = eval_dataset.create_sampler(
                    'test',
                    args.batch_size_eval,
                    args.neg_sample_size_test,
                    args.neg_chunk_size_test,
                    args.eval_filter,
                    mode='chunk-tail',
                    num_workers=args.num_thread,
                    rank=i,
                    ranks=args.num_test_proc)
                test_sampler_heads.append(test_sampler_head)
                test_sampler_tails.append(test_sampler_tail)

            eval_dataset = None
            dataset_full = None

            print("Run test, test processes: %d" % args.num_test_proc)

            queue = mp.Queue(args.num_test_proc)
            procs = []
            for i in range(args.num_test_proc):
                proc = mp.Process(target=test_mp,
                                  args=(args, model_test, [
                                      test_sampler_heads[i],
                                      test_sampler_tails[i]
                                  ], i, 'Test', queue))
                procs.append(proc)
                proc.start()

            total_metrics = {}
            metrics = {}
            logs = []
            for i in range(args.num_test_proc):
                log = queue.get()
                logs = logs + log

            for metric in logs[0].keys():
                metrics[metric] = sum([log[metric]
                                       for log in logs]) / len(logs)
            for k, v in metrics.items():
                print('Test average {} at [{}/{}]: {}'.format(
                    k, args.step, args.max_step, v))

            for proc in procs:
                proc.join()

        if client.get_id() == 0:
            client.shut_down()
Ejemplo n.º 17
0
def _check_topk_score2(score_model, g, num_entity, num_rels, exclude_mode):
    hidden_dim = 32
    num_entity = 40
    num_rels = 4
    with tempfile.TemporaryDirectory() as tmpdirname:
        entity_emb, rel_emb = generate_rand_emb(score_model.model_name, num_entity, num_rels, hidden_dim, 'none')
        create_emb_file(Path(tmpdirname), 'entity.npy', entity_emb.numpy())
        create_emb_file(Path(tmpdirname), 'relation.npy', rel_emb.numpy())

        score_model.load(Path(tmpdirname))
        score_model.attach_graph(g)
        score_func = score_model._score_func

    head = F.arange(0, num_entity // 2)
    rel = F.arange(0, num_rels)
    tail = F.arange(num_entity // 2, num_entity)

    # exec_model==triplet_wise
    tw_rel = np.random.randint(0, num_rels, num_entity // 2)
    tw_rel = F.tensor(tw_rel)
    result1 = score_model.link_predict(head, tw_rel, tail, exec_mode='triplet_wise', exclude_mode=exclude_mode, batch_size=16)
    assert len(result1) == 1
    scores = []
    head_ids = []
    rel_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        hemb = F.take(entity_emb, head[i], 0)
        remb = F.take(rel_emb, tw_rel[i], 0)
        temb = F.unsqueeze(F.take(entity_emb, tail[i], 0), dim=0)
        edge = FakeEdge(hemb, temb, remb)
        score = F.asnumpy(score_func.edge_func(edge)['score'])
        scores.append(score)
        head_ids.append(F.asnumpy(head[i]))
        rel_ids.append(F.asnumpy(tw_rel[i]))
        tail_ids.append(F.asnumpy(tail[i]))
    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    rel_ids = np.asarray(rel_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    if exclude_mode is None or exclude_mode == 'mask':
        idx = idx[:10]
        head_ids = head_ids[idx]
        rel_ids = rel_ids[idx]
        tail_ids = tail_ids[idx]
        score_topk = scores[idx]
        if exclude_mode == 'mask':
            mask = np.zeros((10,))
            for i in range(10):
                if (head_ids[i] + 1) % num_entity == tail_ids[i] or \
                    (head_ids[i] - 1) % num_entity == tail_ids[i]:
                    mask[i] = 1
    else:
        c_head_idx = []
        c_rel_idx = []
        c_tail_idx = []
        c_score_topk = []
        cur_idx = 0
        while len(c_head_idx) < 10:
            c_idx = idx[cur_idx]
            cur_idx += 1
            if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \
                (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]:
                continue
            c_head_idx.append(head_ids[c_idx])
            c_tail_idx.append(tail_ids[c_idx])
            c_rel_idx.append(rel_ids[c_idx])
            c_score_topk.append(scores[c_idx])
        head_ids = F.tensor(c_head_idx)
        rel_ids = F.tensor(c_rel_idx)
        tail_ids = F.tensor(c_tail_idx)
        score_topk = F.tensor(c_score_topk)

    r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[0]
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r1_rel, rel_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    if exclude_mode == 'mask':
        np.testing.assert_allclose(r1_mask, mask)
    else:
        assert r1_mask is None

    # exec_mode==all
    result1 = score_model.link_predict(head, rel, tail, topk=20, exclude_mode=exclude_mode, batch_size=16)
    result2 = score_model.link_predict(head=head, tail=tail, topk=20, exclude_mode=exclude_mode, batch_size=16)
    assert len(result1) == 1
    assert len(result2) == 1

    scores = []
    head_ids = []
    rel_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        for j in range(rel.shape[0]):
            for k in range(tail.shape[0]):
                hemb = F.take(entity_emb, head[i], 0)
                remb = F.take(rel_emb, rel[j], 0)
                temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0)
                edge = FakeEdge(hemb, temb, remb)
                score = F.asnumpy(score_func.edge_func(edge)['score'])
                scores.append(score)
                head_ids.append(F.asnumpy(head[i]))
                rel_ids.append(F.asnumpy(rel[j]))
                tail_ids.append(F.asnumpy(tail[k]))

    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    rel_ids = np.asarray(rel_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    if exclude_mode is None or exclude_mode == 'mask':
        idx = idx[:20]
        head_ids = head_ids[idx]
        rel_ids = rel_ids[idx]
        tail_ids = tail_ids[idx]
        score_topk = scores[idx]
        if exclude_mode == 'mask':
            mask = np.zeros((20,))
            for i in range(20):
                if (head_ids[i] + 1) % num_entity == tail_ids[i] or \
                    (head_ids[i] - 1) % num_entity == tail_ids[i]:
                    mask[i] = 1
    else:
        c_head_idx = []
        c_rel_idx = []
        c_tail_idx = []
        c_score_topk = []
        cur_idx = 0
        while len(c_head_idx) < 20:
            c_idx = idx[cur_idx]
            cur_idx += 1
            if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \
                (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]:
                continue
            c_head_idx.append(head_ids[c_idx])
            c_tail_idx.append(tail_ids[c_idx])
            c_rel_idx.append(rel_ids[c_idx])
            c_score_topk.append(scores[c_idx])
        head_ids = F.tensor(c_head_idx)
        rel_ids = F.tensor(c_rel_idx)
        tail_ids = F.tensor(c_tail_idx)
        score_topk = F.tensor(c_score_topk)

    r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[0]
    r2_head, r2_rel, r2_tail, r2_score, r2_mask = result2[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r2_head, head_ids)
    np.testing.assert_allclose(r1_rel, rel_ids)
    np.testing.assert_allclose(r2_rel, rel_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)
    np.testing.assert_allclose(r2_tail, tail_ids)
    if exclude_mode == 'mask':
        np.testing.assert_allclose(r1_mask, mask)
        np.testing.assert_allclose(r2_mask, mask)
    else:
        assert r1_mask is None
        assert r2_mask is None

    result1 = score_model.link_predict(head, rel, tail, exec_mode='batch_rel', exclude_mode=exclude_mode, batch_size=16)
    result2 = score_model.link_predict(head=head, tail=tail, exec_mode='batch_rel', exclude_mode=exclude_mode, batch_size=16)
    assert len(result1) == num_rels
    assert len(result2) == num_rels
    for j in range(rel.shape[0]):
        scores = []
        head_ids = []
        rel_ids = []
        tail_ids = []
        for i in range(head.shape[0]):
            for k in range(tail.shape[0]):
                hemb = F.take(entity_emb, head[i], 0)
                remb = F.take(rel_emb, rel[j], 0)
                temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0)
                edge = FakeEdge(hemb, temb, remb)
                score = F.asnumpy(score_func.edge_func(edge)['score'])
                scores.append(score)
                head_ids.append(F.asnumpy(head[i]))
                rel_ids.append(F.asnumpy(rel[j]))
                tail_ids.append(F.asnumpy(tail[k]))

        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        head_ids = np.asarray(head_ids)
        rel_ids = np.asarray(rel_ids)
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        if exclude_mode is None or exclude_mode == 'mask':
            idx = idx[:10]
            head_ids = head_ids[idx]
            rel_ids = rel_ids[idx]
            tail_ids = tail_ids[idx]
            score_topk = scores[idx]
            if exclude_mode == 'mask':
                mask = np.full((10,), False)
                for i in range(10):
                    if (head_ids[i] + 1) % num_entity == tail_ids[i] or \
                        (head_ids[i] - 1) % num_entity == tail_ids[i]:
                        mask[i] = True
        else:
            c_head_idx = []
            c_rel_idx = []
            c_tail_idx = []
            c_score_topk = []
            cur_idx = 0
            while len(c_head_idx) < 10:
                c_idx = idx[cur_idx]
                cur_idx += 1
                if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \
                    (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]:
                    continue
                c_head_idx.append(head_ids[c_idx])
                c_tail_idx.append(tail_ids[c_idx])
                c_rel_idx.append(rel_ids[c_idx])
                c_score_topk.append(scores[c_idx])
            head_ids = F.tensor(c_head_idx)
            rel_ids = F.tensor(c_rel_idx)
            tail_ids = F.tensor(c_tail_idx)
            score_topk = F.tensor(c_score_topk)

        r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[j]
        r2_head, r2_rel, r2_tail, r2_score, r2_mask = result2[j]
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r1_rel, rel_ids)
        np.testing.assert_allclose(r2_rel, rel_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)
        if exclude_mode == 'mask':
            np.testing.assert_allclose(r1_mask, mask)
            np.testing.assert_allclose(r2_mask, mask)
        else:
            assert r1_mask is None
            assert r2_mask is None


    head = F.arange(0, num_entity)
    rel = F.arange(0, num_rels)
    tail = F.arange(0, num_entity)
    result1 = score_model.link_predict(head, rel, tail, exec_mode='batch_head', exclude_mode=exclude_mode, batch_size=16)
    result2 = score_model.link_predict(exec_mode='batch_head', exclude_mode=exclude_mode, batch_size=16)
    assert len(result1) == num_entity
    assert len(result2) == num_entity

    for i in range(head.shape[0]):
        scores = []
        head_ids = []
        rel_ids = []
        tail_ids = []
        for j in range(rel.shape[0]):
            for k in range(tail.shape[0]):
                hemb = F.take(entity_emb, head[i], 0)
                remb = F.take(rel_emb, rel[j], 0)
                temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0)
                edge = FakeEdge(hemb, temb, remb)
                score = F.asnumpy(score_func.edge_func(edge)['score'])
                scores.append(score)
                head_ids.append(F.asnumpy(head[i]))
                rel_ids.append(F.asnumpy(rel[j]))
                tail_ids.append(F.asnumpy(tail[k]))

        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        head_ids = np.asarray(head_ids)
        rel_ids = np.asarray(rel_ids)
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        if exclude_mode is None or exclude_mode == 'mask':
            idx = idx[:10]
            head_ids = head_ids[idx]
            rel_ids = rel_ids[idx]
            tail_ids = tail_ids[idx]
            score_topk = scores[idx]
            if exclude_mode == 'mask':
                mask = np.full((10,), False)
                for l in range(10):
                    if (head_ids[l] + 1) % num_entity == tail_ids[l] or \
                        (head_ids[l] - 1) % num_entity == tail_ids[l]:
                        mask[l] = True
        else:
            c_head_idx = []
            c_rel_idx = []
            c_tail_idx = []
            c_score_topk = []
            cur_idx = 0
            while len(c_head_idx) < 10:
                c_idx = idx[cur_idx]
                cur_idx += 1
                if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \
                    (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]:
                    continue
                c_head_idx.append(head_ids[c_idx])
                c_tail_idx.append(tail_ids[c_idx])
                c_rel_idx.append(rel_ids[c_idx])
                c_score_topk.append(scores[c_idx])
            head_ids = F.tensor(c_head_idx)
            rel_ids = F.tensor(c_rel_idx)
            tail_ids = F.tensor(c_tail_idx)
            score_topk = F.tensor(c_score_topk)

        r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[i]
        r2_head, r2_rel, r2_tail, r2_score, r2_mask = result2[i]
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r1_rel, rel_ids)
        np.testing.assert_allclose(r2_rel, rel_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
        if exclude_mode == 'mask':
            np.testing.assert_allclose(r1_mask, mask)
            np.testing.assert_allclose(r2_mask, mask)
        else:
            assert r1_mask is None
            assert r2_mask is None

    result1 = score_model.link_predict(head, rel, tail, exec_mode='batch_tail', exclude_mode=exclude_mode)
    result2 = score_model.link_predict(exec_mode='batch_tail', exclude_mode=exclude_mode)
    assert len(result1) == num_entity
    assert len(result2) == num_entity
    for k in range(tail.shape[0]):
        scores = []
        head_ids = []
        rel_ids = []
        tail_ids = []
        for i in range(head.shape[0]):
            for j in range(rel.shape[0]):
                hemb = F.take(entity_emb, head[i], 0)
                remb = F.take(rel_emb, rel[j], 0)
                temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0)
                edge = FakeEdge(hemb, temb, remb)
                score = F.asnumpy(score_func.edge_func(edge)['score'])
                scores.append(score)
                head_ids.append(F.asnumpy(head[i]))
                rel_ids.append(F.asnumpy(rel[j]))
                tail_ids.append(F.asnumpy(tail[k]))

        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        head_ids = np.asarray(head_ids)
        rel_ids = np.asarray(rel_ids)
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        if exclude_mode is None or exclude_mode == 'mask':
            idx = idx[:10]
            head_ids = head_ids[idx]
            rel_ids = rel_ids[idx]
            tail_ids = tail_ids[idx]
            score_topk = scores[idx]
            if exclude_mode == 'mask':
                mask = np.full((10,), False)
                for l in range(10):
                    if (head_ids[l] + 1) % num_entity == tail_ids[l] or \
                        (head_ids[l] - 1) % num_entity == tail_ids[l]:
                        mask[l] = True
        else:
            c_head_idx = []
            c_rel_idx = []
            c_tail_idx = []
            c_score_topk = []
            cur_idx = 0
            while len(c_head_idx) < 10:
                c_idx = idx[cur_idx]
                cur_idx += 1
                if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \
                    (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]:
                    continue
                c_head_idx.append(head_ids[c_idx])
                c_tail_idx.append(tail_ids[c_idx])
                c_rel_idx.append(rel_ids[c_idx])
                c_score_topk.append(scores[c_idx])
            head_ids = F.tensor(c_head_idx)
            rel_ids = F.tensor(c_rel_idx)
            tail_ids = F.tensor(c_tail_idx)
            score_topk = F.tensor(c_score_topk)

        r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[k]
        r2_head, r2_rel, r2_tail, r2_score, r2_mask = result2[k]
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r1_rel, rel_ids)
        np.testing.assert_allclose(r2_rel, rel_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
        if exclude_mode == 'mask':
            np.testing.assert_allclose(r1_mask, mask)
            np.testing.assert_allclose(r2_mask, mask)
        else:
            assert r1_mask is None
            assert r2_mask is None
Ejemplo n.º 18
0
def run_topk_emb(sfunc, sim_func, create_emb_sim=create_kge_emb_sim):
    hidden_dim = 32
    num_head = 40
    num_tail = 40
    num_emb = 80

    emb = F.uniform((num_emb, hidden_dim), F.float32, F.cpu(), -1, 1)
    head = F.arange(0, num_head)
    tail = F.arange(num_head, num_head+num_tail)
    sim_infer = create_emb_sim(emb, sfunc)

    result1 = sim_infer.topK(head, tail, pair_ws=True)
    scores = []
    head_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        j = i
        hemb = F.take(emb, head[i], 0)
        temb = F.take(emb, tail[j], 0)

        score = sim_func(hemb, temb)
        scores.append(F.asnumpy(score))
        head_ids.append(F.asnumpy(head[i]))
        tail_ids.append(F.asnumpy(tail[j]))
    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    idx = idx[:10]
    head_ids = head_ids[idx]
    tail_ids = tail_ids[idx]
    score_topk = scores[idx]

    r1_head, r1_tail, r1_score = result1[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)
    print('pass pair wise')

    head = F.arange(0, num_head)
    tail = F.arange(num_head, num_head+num_tail)
    result1 = sim_infer.topK(head, tail)
    assert len(result1) == 1
    scores = []
    head_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        for j in range(tail.shape[0]):
            hemb = F.take(emb, head[i], 0)
            temb = F.take(emb, tail[j], 0)

            score = sim_func(hemb, temb)
            scores.append(F.asnumpy(score))
            head_ids.append(F.asnumpy(head[i]))
            tail_ids.append(F.asnumpy(tail[j]))
    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    idx = idx[:10]
    head_ids = head_ids[idx]
    tail_ids = tail_ids[idx]
    score_topk = scores[idx]

    r1_head, r1_tail, r1_score = result1[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)

    emb_ids = F.arange(0, num_emb)
    result1 = sim_infer.topK(emb_ids, emb_ids, bcast=True)
    result2 = sim_infer.topK(bcast=True)
    assert len(result1) == emb_ids.shape[0]
    assert len(result2) == emb_ids.shape[0]

    for i in range(emb_ids.shape[0]):
        scores = []
        head_ids = []
        tail_ids = []
        for j in range(emb_ids.shape[0]):
            hemb = F.take(emb, emb_ids[i], 0)
            temb = F.take(emb, emb_ids[j], 0)

            score = sim_func(hemb, temb)
            score = F.asnumpy(score)
            scores.append(score)
            tail_ids.append(F.asnumpy(emb_ids[j]))
        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        idx = idx[:10]
        head_ids = np.full((10,), F.asnumpy(emb_ids[i]))
        tail_ids = tail_ids[idx]
        score_topk = scores[idx]

        r1_head, r1_tail, r1_score = result1[i]
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        r2_head, r2_tail, r2_score = result2[i]
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)
    print('pass all')
Ejemplo n.º 19
0
def check_topk_score(model_name):
    hidden_dim = 32
    gamma = 12.0

    num_entity = 40
    num_rels = 4
    score_model = ScoreInfer(-1, 'config', 'path', 'none')
    if model_name == 'TransE' or \
        model_name =='TransE_l1' or \
        model_name == 'TransE_l2' or \
        model_name == 'DistMult' or \
        model_name == 'ComplEx':
        model = InferModel('cpu', model_name, hidden_dim, batch_size=16)
    elif model_name == 'RESCAL':
        model = InferModel('cpu', model_name, hidden_dim)
    elif model_name == 'RotatE':
        model = InferModel('cpu',
                           model_name,
                           hidden_dim,
                           double_entity_emb=True)

    entity_emb, rel_emb = generate_rand_emb(model_name, num_entity, num_rels,
                                            hidden_dim, 'none')
    model.entity_emb = InferEmbedding('cpu')
    model.entity_emb.emb = entity_emb
    model.relation_emb = InferEmbedding('cpu')
    model.relation_emb.emb = rel_emb
    score_model.model = model
    score_func = model.score_func

    head = F.arange(0, num_entity // 2)
    rel = F.arange(0, num_rels)
    tail = F.arange(num_entity // 2, num_entity)

    # exec_model==triplet_wise
    tw_rel = np.random.randint(0, num_rels, num_entity // 2)
    tw_rel = F.tensor(tw_rel)
    result1 = score_model.topK(head, tw_rel, tail, exec_mode='triplet_wise')
    assert len(result1) == 1
    scores = []
    head_ids = []
    rel_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        hemb = F.take(entity_emb, head[i], 0)
        remb = F.take(rel_emb, tw_rel[i], 0)
        temb = F.unsqueeze(F.take(entity_emb, tail[i], 0), dim=0)
        edge = FakeEdge(hemb, temb, remb)
        score = F.asnumpy(score_func.edge_func(edge)['score'])
        scores.append(score)
        head_ids.append(F.asnumpy(head[i]))
        rel_ids.append(F.asnumpy(tw_rel[i]))
        tail_ids.append(F.asnumpy(tail[i]))
    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    rel_ids = np.asarray(rel_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    idx = idx[:10]
    head_ids = head_ids[idx]
    rel_ids = rel_ids[idx]
    tail_ids = tail_ids[idx]
    score_topk = scores[idx]

    r1_head, r1_rel, r1_tail, r1_score = result1[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r1_rel, rel_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)

    # exec_mode==all
    result1 = score_model.topK(head, rel, tail, k=20)
    result2 = score_model.topK(head=head, tail=tail, k=20)
    assert len(result1) == 1
    assert len(result2) == 1

    scores = []
    head_ids = []
    rel_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        for j in range(rel.shape[0]):
            for k in range(tail.shape[0]):
                hemb = F.take(entity_emb, head[i], 0)
                remb = F.take(rel_emb, rel[j], 0)
                temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0)
                edge = FakeEdge(hemb, temb, remb)
                score = F.asnumpy(score_func.edge_func(edge)['score'])
                scores.append(score)
                head_ids.append(F.asnumpy(head[i]))
                rel_ids.append(F.asnumpy(rel[j]))
                tail_ids.append(F.asnumpy(tail[k]))

    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    rel_ids = np.asarray(rel_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    idx = idx[:20]
    head_ids = head_ids[idx]
    rel_ids = rel_ids[idx]
    tail_ids = tail_ids[idx]
    score_topk = scores[idx]

    r1_head, r1_rel, r1_tail, r1_score = result1[0]
    r2_head, r2_rel, r2_tail, r2_score = result2[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r2_head, head_ids)
    np.testing.assert_allclose(r1_rel, rel_ids)
    np.testing.assert_allclose(r2_rel, rel_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)
    np.testing.assert_allclose(r2_tail, tail_ids)

    result1 = score_model.topK(head, rel, tail, exec_mode='batch_rel')
    result2 = score_model.topK(head=head, tail=tail, exec_mode='batch_rel')
    assert len(result1) == num_rels
    assert len(result2) == num_rels
    for j in range(rel.shape[0]):
        scores = []
        head_ids = []
        rel_ids = []
        tail_ids = []
        for i in range(head.shape[0]):
            for k in range(tail.shape[0]):
                hemb = F.take(entity_emb, head[i], 0)
                remb = F.take(rel_emb, rel[j], 0)
                temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0)
                edge = FakeEdge(hemb, temb, remb)
                score = F.asnumpy(score_func.edge_func(edge)['score'])
                scores.append(score)
                head_ids.append(F.asnumpy(head[i]))
                rel_ids.append(F.asnumpy(rel[j]))
                tail_ids.append(F.asnumpy(tail[k]))

        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        head_ids = np.asarray(head_ids)
        rel_ids = np.asarray(rel_ids)
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        idx = idx[:10]
        head_ids = head_ids[idx]
        rel_ids = rel_ids[idx]
        tail_ids = tail_ids[idx]
        score_topk = scores[idx]

        r1_head, r1_rel, r1_tail, r1_score = result1[j]
        r2_head, r2_rel, r2_tail, r2_score = result2[j]
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r1_rel, rel_ids)
        np.testing.assert_allclose(r2_rel, rel_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)

    head = F.arange(0, num_entity)
    rel = F.arange(0, num_rels)
    tail = F.arange(0, num_entity)
    result1 = score_model.topK(head, rel, tail, exec_mode='batch_head')
    result2 = score_model.topK(exec_mode='batch_head')
    assert len(result1) == num_entity
    assert len(result2) == num_entity

    for i in range(head.shape[0]):
        scores = []
        head_ids = []
        rel_ids = []
        tail_ids = []
        for j in range(rel.shape[0]):
            for k in range(tail.shape[0]):
                hemb = F.take(entity_emb, head[i], 0)
                remb = F.take(rel_emb, rel[j], 0)
                temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0)
                edge = FakeEdge(hemb, temb, remb)
                score = F.asnumpy(score_func.edge_func(edge)['score'])
                scores.append(score)
                head_ids.append(F.asnumpy(head[i]))
                rel_ids.append(F.asnumpy(rel[j]))
                tail_ids.append(F.asnumpy(tail[k]))

        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        head_ids = np.asarray(head_ids)
        rel_ids = np.asarray(rel_ids)
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        idx = idx[:10]
        head_ids = head_ids[idx]
        rel_ids = rel_ids[idx]
        tail_ids = tail_ids[idx]
        score_topk = scores[idx]

        r1_head, r1_rel, r1_tail, r1_score = result1[i]
        r2_head, r2_rel, r2_tail, r2_score = result2[i]
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r1_rel, rel_ids)
        np.testing.assert_allclose(r2_rel, rel_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)

    result1 = score_model.topK(head, rel, tail, exec_mode='batch_tail')
    result2 = score_model.topK(exec_mode='batch_tail')
    assert len(result1) == num_entity
    assert len(result2) == num_entity
    for k in range(tail.shape[0]):
        scores = []
        head_ids = []
        rel_ids = []
        tail_ids = []
        for i in range(head.shape[0]):
            for j in range(rel.shape[0]):
                hemb = F.take(entity_emb, head[i], 0)
                remb = F.take(rel_emb, rel[j], 0)
                temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0)
                edge = FakeEdge(hemb, temb, remb)
                score = F.asnumpy(score_func.edge_func(edge)['score'])
                scores.append(score)
                head_ids.append(F.asnumpy(head[i]))
                rel_ids.append(F.asnumpy(rel[j]))
                tail_ids.append(F.asnumpy(tail[k]))

        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        head_ids = np.asarray(head_ids)
        rel_ids = np.asarray(rel_ids)
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        idx = idx[:10]
        head_ids = head_ids[idx]
        rel_ids = rel_ids[idx]
        tail_ids = tail_ids[idx]
        score_topk = scores[idx]

        r1_head, r1_rel, r1_tail, r1_score = result1[k]
        r2_head, r2_rel, r2_tail, r2_score = result2[k]
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r1_rel, rel_ids)
        np.testing.assert_allclose(r2_rel, rel_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-6, atol=1e-6)
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-6, atol=1e-6)
Ejemplo n.º 20
0
def dist_train_test(args,
                    model,
                    train_sampler,
                    entity_pb,
                    relation_pb,
                    l2g,
                    rank=0,
                    rel_parts=None,
                    cross_rels=None,
                    barrier=None):
    if args.num_proc > 1:
        th.set_num_threads(args.num_thread)

    client = connect_to_kvstore(args, entity_pb, relation_pb, l2g)
    client.barrier()
    train_time_start = time.time()
    train(args, model, train_sampler, None, rank, rel_parts, cross_rels,
          barrier, client)
    total_train_time = time.time() - train_time_start
    client.barrier()

    # Release the memory of local model
    model = None

    if (client.get_machine_id()
            == 0) and (client.get_id() % args.num_client
                       == 0):  # pull full model from kvstore
        # Pull model from kvstore
        args.num_test_proc = args.num_client
        dataset_full = dataset = get_dataset(args.data_path, args.dataset,
                                             args.format, args.delimiter,
                                             args.data_files)
        args.train = False
        args.valid = False
        args.test = True
        args.strict_rel_part = False
        args.soft_rel_part = False
        args.async_update = False

        args.eval_filter = not args.no_eval_filter
        if args.neg_deg_sample_eval:
            assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges."

        print('Full data n_entities: ' + str(dataset_full.n_entities))
        print("Full data n_relations: " + str(dataset_full.n_relations))

        eval_dataset = EvalDataset(dataset_full, args)

        if args.neg_sample_size_eval < 0:
            args.neg_sample_size_eval = dataset_full.n_entities
        args.batch_size_eval = get_compatible_batch_size(
            args.batch_size_eval, args.neg_sample_size_eval)

        model_test = load_model(args, dataset_full.n_entities,
                                dataset_full.n_relations)

        print("Pull relation_emb ...")
        relation_id = F.arange(0, model_test.n_relations)
        relation_data = client.pull(name='relation_emb', id_tensor=relation_id)
        model_test.relation_emb.emb[relation_id] = relation_data

        print("Pull entity_emb ... ")
        # split model into 100 small parts
        start = 0
        percent = 0
        entity_id = F.arange(0, model_test.n_entities)
        count = int(model_test.n_entities / 100)
        end = start + count
        while True:
            print("Pull model from kvstore: %d / 100 ..." % percent)
            if end >= model_test.n_entities:
                end = -1
            tmp_id = entity_id[start:end]
            entity_data = client.pull(name='entity_emb', id_tensor=tmp_id)
            model_test.entity_emb.emb[tmp_id] = entity_data
            if end == -1:
                break
            start = end
            end += count
            percent += 1

        if not args.no_save_emb:
            print("save model to %s ..." % args.save_path)
            save_model(args, model_test)

        print('Total train time {:.3f} seconds'.format(total_train_time))

        if args.test:
            model_test.share_memory()
            start = time.time()
            test_sampler_tails = []
            test_sampler_heads = []
            for i in range(args.num_test_proc):
                test_sampler_head = eval_dataset.create_sampler(
                    'test',
                    args.batch_size_eval,
                    args.neg_sample_size_eval,
                    args.neg_sample_size_eval,
                    args.eval_filter,
                    mode='chunk-head',
                    num_workers=args.num_workers,
                    rank=i,
                    ranks=args.num_test_proc)
                test_sampler_tail = eval_dataset.create_sampler(
                    'test',
                    args.batch_size_eval,
                    args.neg_sample_size_eval,
                    args.neg_sample_size_eval,
                    args.eval_filter,
                    mode='chunk-tail',
                    num_workers=args.num_workers,
                    rank=i,
                    ranks=args.num_test_proc)
                test_sampler_heads.append(test_sampler_head)
                test_sampler_tails.append(test_sampler_tail)

            eval_dataset = None
            dataset_full = None

            print("Run test, test processes: %d" % args.num_test_proc)

            queue = mp.Queue(args.num_test_proc)
            procs = []
            for i in range(args.num_test_proc):
                proc = mp.Process(target=test_mp,
                                  args=(args, model_test, [
                                      test_sampler_heads[i],
                                      test_sampler_tails[i]
                                  ], i, 'Test', queue))
                procs.append(proc)
                proc.start()

            total_metrics = {}
            metrics = {}
            logs = []
            for i in range(args.num_test_proc):
                log = queue.get()
                logs = logs + log

            for metric in logs[0].keys():
                metrics[metric] = sum([log[metric]
                                       for log in logs]) / len(logs)

            print("-------------- Test result --------------")
            for k, v in metrics.items():
                print('Test average {} : {}'.format(k, v))
            print("-----------------------------------------")

            for proc in procs:
                proc.join()

            print('testing takes {:.3f} seconds'.format(time.time() - start))

        client.shut_down()  # shut down kvserver
Ejemplo n.º 21
0
 def part_edge(self, rank, world_size, mode):
     edges = self.edge_parts[rank]
     if edges is None:
         edges = F.arange(0, self.g.number_of_edges())
     return edges
Ejemplo n.º 22
0
def check_topk_score(model_name):
    hidden_dim = 32
    gamma = 12.0

    num_entity = 40
    num_rels = 4
    entity_emb, rel_emb = generate_rand_emb(model_name, num_entity, num_rels, hidden_dim, 'none')
    score_model, score_func = create_score_infer(model_name, entity_emb, rel_emb)

    head = F.arange(0, num_entity // 2)
    rel = F.arange(0, num_rels)
    tail = F.arange(num_entity // 2, num_entity)

    # exec_model==triplet_wise
    tw_rel = np.random.randint(0, num_rels, num_entity // 2)
    tw_rel = F.tensor(tw_rel)
    result1 = score_model.topK(head, tw_rel, tail, exec_mode='triplet_wise')
    assert len(result1) == 1
    scores = []
    head_ids = []
    rel_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        hemb = F.take(entity_emb, head[i], 0)
        remb = F.take(rel_emb, tw_rel[i], 0)
        temb = F.unsqueeze(F.take(entity_emb, tail[i], 0), dim=0)
        edge = FakeEdge(hemb, temb, remb)
        score = F.asnumpy(score_func.edge_func(edge)['score'])
        scores.append(score)
        head_ids.append(F.asnumpy(head[i]))
        rel_ids.append(F.asnumpy(tw_rel[i]))
        tail_ids.append(F.asnumpy(tail[i]))
    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    rel_ids = np.asarray(rel_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    idx = idx[:10]
    head_ids = head_ids[idx]
    rel_ids = rel_ids[idx]
    tail_ids = tail_ids[idx]
    score_topk = scores[idx]

    r1_head, r1_rel, r1_tail, r1_score = result1[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r1_rel, rel_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)

    # exec_mode==all
    result1 = score_model.topK(head, rel, tail, k=20)
    result2 = score_model.topK(head=head, tail=tail, k=20)
    assert len(result1) == 1
    assert len(result2) == 1

    scores = []
    head_ids = []
    rel_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        for j in range(rel.shape[0]):
            for k in range(tail.shape[0]):
                hemb = F.take(entity_emb, head[i], 0)
                remb = F.take(rel_emb, rel[j], 0)
                temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0)
                edge = FakeEdge(hemb, temb, remb)
                score = F.asnumpy(score_func.edge_func(edge)['score'])
                scores.append(score)
                head_ids.append(F.asnumpy(head[i]))
                rel_ids.append(F.asnumpy(rel[j]))
                tail_ids.append(F.asnumpy(tail[k]))

    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    rel_ids = np.asarray(rel_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    idx = idx[:20]
    head_ids = head_ids[idx]
    rel_ids = rel_ids[idx]
    tail_ids = tail_ids[idx]
    score_topk = scores[idx]

    r1_head, r1_rel, r1_tail, r1_score = result1[0]
    r2_head, r2_rel, r2_tail, r2_score = result2[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r2_head, head_ids)
    np.testing.assert_allclose(r1_rel, rel_ids)
    np.testing.assert_allclose(r2_rel, rel_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)
    np.testing.assert_allclose(r2_tail, tail_ids)

    result1 = score_model.topK(head, rel, tail, exec_mode='batch_rel')
    result2 = score_model.topK(head=head, tail=tail, exec_mode='batch_rel')
    assert len(result1) == num_rels
    assert len(result2) == num_rels
    for j in range(rel.shape[0]):
        scores = []
        head_ids = []
        rel_ids = []
        tail_ids = []
        for i in range(head.shape[0]):
            for k in range(tail.shape[0]):
                hemb = F.take(entity_emb, head[i], 0)
                remb = F.take(rel_emb, rel[j], 0)
                temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0)
                edge = FakeEdge(hemb, temb, remb)
                score = F.asnumpy(score_func.edge_func(edge)['score'])
                scores.append(score)
                head_ids.append(F.asnumpy(head[i]))
                rel_ids.append(F.asnumpy(rel[j]))
                tail_ids.append(F.asnumpy(tail[k]))

        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        head_ids = np.asarray(head_ids)
        rel_ids = np.asarray(rel_ids)
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        idx = idx[:10]
        head_ids = head_ids[idx]
        rel_ids = rel_ids[idx]
        tail_ids = tail_ids[idx]
        score_topk = scores[idx]

        r1_head, r1_rel, r1_tail, r1_score = result1[j]
        r2_head, r2_rel, r2_tail, r2_score = result2[j]
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r1_rel, rel_ids)
        np.testing.assert_allclose(r2_rel, rel_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)

    head = F.arange(0, num_entity)
    rel = F.arange(0, num_rels)
    tail = F.arange(0, num_entity)
    result1 = score_model.topK(head, rel, tail, exec_mode='batch_head')
    result2 = score_model.topK(exec_mode='batch_head')
    assert len(result1) == num_entity
    assert len(result2) == num_entity

    for i in range(head.shape[0]):
        scores = []
        head_ids = []
        rel_ids = []
        tail_ids = []
        for j in range(rel.shape[0]):
            for k in range(tail.shape[0]):
                hemb = F.take(entity_emb, head[i], 0)
                remb = F.take(rel_emb, rel[j], 0)
                temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0)
                edge = FakeEdge(hemb, temb, remb)
                score = F.asnumpy(score_func.edge_func(edge)['score'])
                scores.append(score)
                head_ids.append(F.asnumpy(head[i]))
                rel_ids.append(F.asnumpy(rel[j]))
                tail_ids.append(F.asnumpy(tail[k]))

        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        head_ids = np.asarray(head_ids)
        rel_ids = np.asarray(rel_ids)
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        idx = idx[:10]
        head_ids = head_ids[idx]
        rel_ids = rel_ids[idx]
        tail_ids = tail_ids[idx]
        score_topk = scores[idx]

        r1_head, r1_rel, r1_tail, r1_score = result1[i]
        r2_head, r2_rel, r2_tail, r2_score = result2[i]
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r1_rel, rel_ids)
        np.testing.assert_allclose(r2_rel, rel_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)

    result1 = score_model.topK(head, rel, tail, exec_mode='batch_tail')
    result2 = score_model.topK(exec_mode='batch_tail')
    assert len(result1) == num_entity
    assert len(result2) == num_entity
    for k in range(tail.shape[0]):
        scores = []
        head_ids = []
        rel_ids = []
        tail_ids = []
        for i in range(head.shape[0]):
            for j in range(rel.shape[0]):
                hemb = F.take(entity_emb, head[i], 0)
                remb = F.take(rel_emb, rel[j], 0)
                temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0)
                edge = FakeEdge(hemb, temb, remb)
                score = F.asnumpy(score_func.edge_func(edge)['score'])
                scores.append(score)
                head_ids.append(F.asnumpy(head[i]))
                rel_ids.append(F.asnumpy(rel[j]))
                tail_ids.append(F.asnumpy(tail[k]))

        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        head_ids = np.asarray(head_ids)
        rel_ids = np.asarray(rel_ids)
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        idx = idx[:10]
        head_ids = head_ids[idx]
        rel_ids = rel_ids[idx]
        tail_ids = tail_ids[idx]
        score_topk = scores[idx]

        r1_head, r1_rel, r1_tail, r1_score = result1[k]
        r2_head, r2_rel, r2_tail, r2_score = result2[k]
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r1_rel, rel_ids)
        np.testing.assert_allclose(r2_rel, rel_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-6, atol=1e-6)
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-6, atol=1e-6)