def create_graph(num_part, dist_graph_path, hetero): if not hetero: g = dgl.rand_graph(10000, 42000) g.ndata['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_part, dist_graph_path) else: from scipy import sparse as spsp num_nodes = {'n1': 10000, 'n2': 10010, 'n3': 10020} etypes = [('n1', 'r1', 'n2'), ('n1', 'r2', 'n3'), ('n2', 'r3', 'n3')] edges = {} for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random(num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format='coo', random_state=100) edges[etype] = (arr.row, arr.col) g = dgl.heterograph(edges, num_nodes) g.nodes['n1'].data['feat'] = F.unsqueeze( F.arange(0, g.number_of_nodes('n1')), 1) g.edges['r1'].data['feat'] = F.unsqueeze( F.arange(0, g.number_of_edges('r1')), 1) partition_graph(g, graph_name, num_part, dist_graph_path)
def knn_graphE(x, k, istrain=False): """Transforms the given point set to a directed graph, whose coordinates are given as a matrix. The predecessors of each point are its k-nearest neighbors. If a 3D tensor is given instead, then each row would be transformed into a separate graph. The graphs will be unioned. Parameters ---------- x : Tensor The input tensor. If 2D, each row of ``x`` corresponds to a node. If 3D, a k-NN graph would be constructed for each row. Then the graphs are unioned. k : int The number of neighbors Returns ------- DGLGraph The graph. The node IDs are in the same order as ``x``. """ if F.ndim(x) == 2: x = F.unsqueeze(x, 0) n_samples, n_points, _ = F.shape(x) dist = pairwise_squared_distance(x) if istrain and np.random.rand() > 0.5: k_indices = F.argtopk(dist, round(1.5 * k), 2, descending=False) rand_k = np.random.permutation(round(1.5 * k) - 1)[0:k - 1] + 1 # 0 + random k-1 rand_k = np.append(rand_k, 0) k_indices = k_indices[:, :, rand_k] # add 0 else: k_indices = F.argtopk(dist, k, 2, descending=False) dst = F.copy_to(k_indices, F.cpu()) src = F.zeros_like(dst) + F.reshape(F.arange(0, n_points), (1, -1, 1)) per_sample_offset = F.reshape( F.arange(0, n_samples) * n_points, (-1, 1, 1)) dst += per_sample_offset src += per_sample_offset dst = F.reshape(dst, (-1, )) src = F.reshape(src, (-1, )) adj = sparse.csr_matrix( (F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src)))) g = DGLGraph(adj, readonly=True) return g
def check_infer_score(func_name): batch_size = 10 ke_score_func = ke_infer_funcs[func_name] # normal head_emb, rel_emb, tail_emb, args = generate_rand_emb(func_name, 'none') if args is None: score_func = ke_score_func() elif type(args) is tuple: score_func = ke_score_func(*list(args)) else: score_func = ke_score_func(args) score1 = score_func.infer(head_emb, rel_emb, tail_emb) assert(score1.shape[0] == head_emb.shape[0]) h_score = [] for i in range(head_emb.shape[0]): r_score = [] for j in range(rel_emb.shape[0]): t_score = [] for k in range(tail_emb.shape[0]): hemb = head_emb[i] remb = rel_emb[j] temb = F.unsqueeze(tail_emb[k], dim=0) edge = FakeEdge(hemb, temb, remb) score = score_func.edge_func(edge)['score'] t_score.append(F.asnumpy(score)) r_score.append(t_score) h_score.append(r_score) score2 = np.asarray(h_score).reshape(head_emb.shape[0], rel_emb.shape[0], tail_emb.shape[0]) np.testing.assert_allclose(F.asnumpy(score1), score2, rtol=1e-5, atol=1e-5) # bcast head head_emb, rel_emb, tail_emb, args = generate_rand_emb(func_name, 'head') if args is None: score_func = ke_score_func() elif type(args) is tuple: score_func = ke_score_func(*list(args)) else: score_func = ke_score_func(args) score1 = score_func.infer(head_emb, rel_emb, tail_emb) assert(score1.shape[0] == head_emb.shape[0]) h_score = [] for i in range(head_emb.shape[0]): r_score = [] for j in range(rel_emb.shape[0]): t_score = [] for k in range(tail_emb.shape[0]): hemb = head_emb[i] remb = rel_emb[j] temb = F.unsqueeze(tail_emb[k], dim=0) edge = FakeEdge(hemb, temb, remb) score = score_func.edge_func(edge)['score'] t_score.append(F.asnumpy(score)) r_score.append(t_score) h_score.append(r_score) score2 = np.asarray(h_score).reshape(1, rel_emb.shape[0], tail_emb.shape[0]) np.testing.assert_allclose(F.asnumpy(score1), score2, rtol=1e-5, atol=1e-5) # bcast rel head_emb, rel_emb, tail_emb, args = generate_rand_emb(func_name, 'rel') if args is None: score_func = ke_score_func() elif type(args) is tuple: score_func = ke_score_func(*list(args)) else: score_func = ke_score_func(args) score1 = score_func.infer(head_emb, rel_emb, tail_emb) assert(score1.shape[0] == head_emb.shape[0]) h_score = [] for i in range(head_emb.shape[0]): r_score = [] for j in range(rel_emb.shape[0]): t_score = [] for k in range(tail_emb.shape[0]): hemb = head_emb[i] remb = rel_emb[j] temb = F.unsqueeze(tail_emb[k], dim=0) edge = FakeEdge(hemb, temb, remb) score = score_func.edge_func(edge)['score'] t_score.append(F.asnumpy(score)) r_score.append(t_score) h_score.append(r_score) score2 = np.asarray(h_score).reshape(head_emb.shape[0], 1, tail_emb.shape[0]) np.testing.assert_allclose(F.asnumpy(score1), score2, rtol=1e-5, atol=1e-5) # bcast tail head_emb, rel_emb, tail_emb, args = generate_rand_emb(func_name, 'tail') if args is None: score_func = ke_score_func() elif type(args) is tuple: score_func = ke_score_func(*list(args)) else: score_func = ke_score_func(args) score1 = score_func.infer(head_emb, rel_emb, tail_emb) assert(score1.shape[0] == head_emb.shape[0]) h_score = [] for i in range(head_emb.shape[0]): r_score = [] for j in range(rel_emb.shape[0]): t_score = [] for k in range(tail_emb.shape[0]): hemb = head_emb[i] remb = rel_emb[j] temb = F.unsqueeze(tail_emb[k], dim=0) edge = FakeEdge(hemb, temb, remb) score = score_func.edge_func(edge)['score'] t_score.append(F.asnumpy(score)) r_score.append(t_score) h_score.append(r_score) score2 = np.asarray(h_score).reshape(head_emb.shape[0], rel_emb.shape[0], 1) np.testing.assert_allclose(F.asnumpy(score1), score2, rtol=1e-5, atol=1e-5)
def topK(self, head=None, rel=None, tail=None, exec_mode='all', k=10): if head is None: head = F.arange(0, self.model.num_entity) else: head = F.tensor(head) if rel is None: rel = F.arange(0, self.model.num_rel) else: rel = F.tensor(rel) if tail is None: tail = F.arange(0, self.model.num_entity) else: tail = F.tensor(tail) num_head = F.shape(head)[0] num_rel = F.shape(rel)[0] num_tail = F.shape(tail)[0] if exec_mode == 'triplet_wise': result = [] assert num_head == num_rel, \ 'For triplet wise exection mode, head, relation and tail lists should have same length' assert num_head == num_tail, \ 'For triplet wise exection mode, head, relation and tail lists should have same length' raw_score = self.model.score(head, rel, tail, triplet_wise=True) score = self.score_func(raw_score) idx = F.arange(0, num_head) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] idx = idx[sidx] result.append((F.asnumpy(head[idx]), F.asnumpy(rel[idx]), F.asnumpy(tail[idx]), F.asnumpy(score))) elif exec_mode == 'all': result = [] raw_score = self.model.score(head, rel, tail) score = self.score_func(raw_score) idx = F.arange(0, num_head * num_rel * num_tail) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] idx = idx[sidx] tail_idx = idx % num_tail idx = floor_divide(idx, num_tail) rel_idx = idx % num_rel idx = floor_divide(idx, num_rel) head_idx = idx % num_head result.append((F.asnumpy(head[head_idx]), F.asnumpy(rel[rel_idx]), F.asnumpy(tail[tail_idx]), F.asnumpy(score))) elif exec_mode == 'batch_head': result = [] for i in range(num_head): raw_score = self.model.score(F.unsqueeze(head[i], 0), rel, tail) score = self.score_func(raw_score) idx = F.arange(0, num_rel * num_tail) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] idx = idx[sidx] tail_idx = idx % num_tail idx = floor_divide(idx, num_tail) rel_idx = idx % num_rel result.append((np.full((k,), F.asnumpy(head[i])), F.asnumpy(rel[rel_idx]), F.asnumpy(tail[tail_idx]), F.asnumpy(score))) elif exec_mode == 'batch_rel': result = [] for i in range(num_rel): raw_score = self.model.score(head, F.unsqueeze(rel[i], 0), tail) score = self.score_func(raw_score) idx = F.arange(0, num_head * num_tail) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] idx = idx[sidx] tail_idx = idx % num_tail idx = floor_divide(idx, num_tail) head_idx = idx % num_head result.append((F.asnumpy(head[head_idx]), np.full((k,), F.asnumpy(rel[i])), F.asnumpy(tail[tail_idx]), F.asnumpy(score))) elif exec_mode == 'batch_tail': result = [] for i in range(num_tail): raw_score = self.model.score(head, rel, F.unsqueeze(tail[i], 0)) score = self.score_func(raw_score) idx = F.arange(0, num_head * num_rel) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] idx = idx[sidx] rel_idx = idx % num_rel idx = floor_divide(idx, num_rel) head_idx = idx % num_head result.append((F.asnumpy(head[head_idx]), F.asnumpy(rel[rel_idx]), np.full((k,), F.asnumpy(tail[i])), F.asnumpy(score))) else: assert False, 'unknow execution mode type {}'.format(exec_mode) return result
def _check_topk_score2(score_model, g, num_entity, num_rels, exclude_mode): hidden_dim = 32 num_entity = 40 num_rels = 4 with tempfile.TemporaryDirectory() as tmpdirname: entity_emb, rel_emb = generate_rand_emb(score_model.model_name, num_entity, num_rels, hidden_dim, 'none') create_emb_file(Path(tmpdirname), 'entity.npy', entity_emb.numpy()) create_emb_file(Path(tmpdirname), 'relation.npy', rel_emb.numpy()) score_model.load(Path(tmpdirname)) score_model.attach_graph(g) score_func = score_model._score_func head = F.arange(0, num_entity // 2) rel = F.arange(0, num_rels) tail = F.arange(num_entity // 2, num_entity) # exec_model==triplet_wise tw_rel = np.random.randint(0, num_rels, num_entity // 2) tw_rel = F.tensor(tw_rel) result1 = score_model.link_predict(head, tw_rel, tail, exec_mode='triplet_wise', exclude_mode=exclude_mode, batch_size=16) assert len(result1) == 1 scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, tw_rel[i], 0) temb = F.unsqueeze(F.take(entity_emb, tail[i], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(tw_rel[i])) tail_ids.append(F.asnumpy(tail[i])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] if exclude_mode is None or exclude_mode == 'mask': idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] if exclude_mode == 'mask': mask = np.zeros((10,)) for i in range(10): if (head_ids[i] + 1) % num_entity == tail_ids[i] or \ (head_ids[i] - 1) % num_entity == tail_ids[i]: mask[i] = 1 else: c_head_idx = [] c_rel_idx = [] c_tail_idx = [] c_score_topk = [] cur_idx = 0 while len(c_head_idx) < 10: c_idx = idx[cur_idx] cur_idx += 1 if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \ (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]: continue c_head_idx.append(head_ids[c_idx]) c_tail_idx.append(tail_ids[c_idx]) c_rel_idx.append(rel_ids[c_idx]) c_score_topk.append(scores[c_idx]) head_ids = F.tensor(c_head_idx) rel_ids = F.tensor(c_rel_idx) tail_ids = F.tensor(c_tail_idx) score_topk = F.tensor(c_score_topk) r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[0] np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) if exclude_mode == 'mask': np.testing.assert_allclose(r1_mask, mask) else: assert r1_mask is None # exec_mode==all result1 = score_model.link_predict(head, rel, tail, topk=20, exclude_mode=exclude_mode, batch_size=16) result2 = score_model.link_predict(head=head, tail=tail, topk=20, exclude_mode=exclude_mode, batch_size=16) assert len(result1) == 1 assert len(result2) == 1 scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(rel.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] if exclude_mode is None or exclude_mode == 'mask': idx = idx[:20] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] if exclude_mode == 'mask': mask = np.zeros((20,)) for i in range(20): if (head_ids[i] + 1) % num_entity == tail_ids[i] or \ (head_ids[i] - 1) % num_entity == tail_ids[i]: mask[i] = 1 else: c_head_idx = [] c_rel_idx = [] c_tail_idx = [] c_score_topk = [] cur_idx = 0 while len(c_head_idx) < 20: c_idx = idx[cur_idx] cur_idx += 1 if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \ (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]: continue c_head_idx.append(head_ids[c_idx]) c_tail_idx.append(tail_ids[c_idx]) c_rel_idx.append(rel_ids[c_idx]) c_score_topk.append(scores[c_idx]) head_ids = F.tensor(c_head_idx) rel_ids = F.tensor(c_rel_idx) tail_ids = F.tensor(c_tail_idx) score_topk = F.tensor(c_score_topk) r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[0] r2_head, r2_rel, r2_tail, r2_score, r2_mask = result2[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) if exclude_mode == 'mask': np.testing.assert_allclose(r1_mask, mask) np.testing.assert_allclose(r2_mask, mask) else: assert r1_mask is None assert r2_mask is None result1 = score_model.link_predict(head, rel, tail, exec_mode='batch_rel', exclude_mode=exclude_mode, batch_size=16) result2 = score_model.link_predict(head=head, tail=tail, exec_mode='batch_rel', exclude_mode=exclude_mode, batch_size=16) assert len(result1) == num_rels assert len(result2) == num_rels for j in range(rel.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] if exclude_mode is None or exclude_mode == 'mask': idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] if exclude_mode == 'mask': mask = np.full((10,), False) for i in range(10): if (head_ids[i] + 1) % num_entity == tail_ids[i] or \ (head_ids[i] - 1) % num_entity == tail_ids[i]: mask[i] = True else: c_head_idx = [] c_rel_idx = [] c_tail_idx = [] c_score_topk = [] cur_idx = 0 while len(c_head_idx) < 10: c_idx = idx[cur_idx] cur_idx += 1 if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \ (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]: continue c_head_idx.append(head_ids[c_idx]) c_tail_idx.append(tail_ids[c_idx]) c_rel_idx.append(rel_ids[c_idx]) c_score_topk.append(scores[c_idx]) head_ids = F.tensor(c_head_idx) rel_ids = F.tensor(c_rel_idx) tail_ids = F.tensor(c_tail_idx) score_topk = F.tensor(c_score_topk) r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[j] r2_head, r2_rel, r2_tail, r2_score, r2_mask = result2[j] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) if exclude_mode == 'mask': np.testing.assert_allclose(r1_mask, mask) np.testing.assert_allclose(r2_mask, mask) else: assert r1_mask is None assert r2_mask is None head = F.arange(0, num_entity) rel = F.arange(0, num_rels) tail = F.arange(0, num_entity) result1 = score_model.link_predict(head, rel, tail, exec_mode='batch_head', exclude_mode=exclude_mode, batch_size=16) result2 = score_model.link_predict(exec_mode='batch_head', exclude_mode=exclude_mode, batch_size=16) assert len(result1) == num_entity assert len(result2) == num_entity for i in range(head.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for j in range(rel.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] if exclude_mode is None or exclude_mode == 'mask': idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] if exclude_mode == 'mask': mask = np.full((10,), False) for l in range(10): if (head_ids[l] + 1) % num_entity == tail_ids[l] or \ (head_ids[l] - 1) % num_entity == tail_ids[l]: mask[l] = True else: c_head_idx = [] c_rel_idx = [] c_tail_idx = [] c_score_topk = [] cur_idx = 0 while len(c_head_idx) < 10: c_idx = idx[cur_idx] cur_idx += 1 if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \ (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]: continue c_head_idx.append(head_ids[c_idx]) c_tail_idx.append(tail_ids[c_idx]) c_rel_idx.append(rel_ids[c_idx]) c_score_topk.append(scores[c_idx]) head_ids = F.tensor(c_head_idx) rel_ids = F.tensor(c_rel_idx) tail_ids = F.tensor(c_tail_idx) score_topk = F.tensor(c_score_topk) r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[i] r2_head, r2_rel, r2_tail, r2_score, r2_mask = result2[i] np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) if exclude_mode == 'mask': np.testing.assert_allclose(r1_mask, mask) np.testing.assert_allclose(r2_mask, mask) else: assert r1_mask is None assert r2_mask is None result1 = score_model.link_predict(head, rel, tail, exec_mode='batch_tail', exclude_mode=exclude_mode) result2 = score_model.link_predict(exec_mode='batch_tail', exclude_mode=exclude_mode) assert len(result1) == num_entity assert len(result2) == num_entity for k in range(tail.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(rel.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] if exclude_mode is None or exclude_mode == 'mask': idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] if exclude_mode == 'mask': mask = np.full((10,), False) for l in range(10): if (head_ids[l] + 1) % num_entity == tail_ids[l] or \ (head_ids[l] - 1) % num_entity == tail_ids[l]: mask[l] = True else: c_head_idx = [] c_rel_idx = [] c_tail_idx = [] c_score_topk = [] cur_idx = 0 while len(c_head_idx) < 10: c_idx = idx[cur_idx] cur_idx += 1 if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \ (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]: continue c_head_idx.append(head_ids[c_idx]) c_tail_idx.append(tail_ids[c_idx]) c_rel_idx.append(rel_ids[c_idx]) c_score_topk.append(scores[c_idx]) head_ids = F.tensor(c_head_idx) rel_ids = F.tensor(c_rel_idx) tail_ids = F.tensor(c_tail_idx) score_topk = F.tensor(c_score_topk) r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[k] r2_head, r2_rel, r2_tail, r2_score, r2_mask = result2[k] np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) if exclude_mode == 'mask': np.testing.assert_allclose(r1_mask, mask) np.testing.assert_allclose(r2_mask, mask) else: assert r1_mask is None assert r2_mask is None
def check_topk_score(model_name): hidden_dim = 32 gamma = 12.0 num_entity = 40 num_rels = 4 entity_emb, rel_emb = generate_rand_emb(model_name, num_entity, num_rels, hidden_dim, 'none') score_model, score_func = create_score_infer(model_name, entity_emb, rel_emb) head = F.arange(0, num_entity // 2) rel = F.arange(0, num_rels) tail = F.arange(num_entity // 2, num_entity) # exec_model==triplet_wise tw_rel = np.random.randint(0, num_rels, num_entity // 2) tw_rel = F.tensor(tw_rel) result1 = score_model.topK(head, tw_rel, tail, exec_mode='triplet_wise') assert len(result1) == 1 scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, tw_rel[i], 0) temb = F.unsqueeze(F.take(entity_emb, tail[i], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(tw_rel[i])) tail_ids.append(F.asnumpy(tail[i])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) # exec_mode==all result1 = score_model.topK(head, rel, tail, k=20) result2 = score_model.topK(head=head, tail=tail, k=20) assert len(result1) == 1 assert len(result2) == 1 scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(rel.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:20] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[0] r2_head, r2_rel, r2_tail, r2_score = result2[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) result1 = score_model.topK(head, rel, tail, exec_mode='batch_rel') result2 = score_model.topK(head=head, tail=tail, exec_mode='batch_rel') assert len(result1) == num_rels assert len(result2) == num_rels for j in range(rel.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[j] r2_head, r2_rel, r2_tail, r2_score = result2[j] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) head = F.arange(0, num_entity) rel = F.arange(0, num_rels) tail = F.arange(0, num_entity) result1 = score_model.topK(head, rel, tail, exec_mode='batch_head') result2 = score_model.topK(exec_mode='batch_head') assert len(result1) == num_entity assert len(result2) == num_entity for i in range(head.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for j in range(rel.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[i] r2_head, r2_rel, r2_tail, r2_score = result2[i] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) result1 = score_model.topK(head, rel, tail, exec_mode='batch_tail') result2 = score_model.topK(exec_mode='batch_tail') assert len(result1) == num_entity assert len(result2) == num_entity for k in range(tail.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(rel.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[k] r2_head, r2_rel, r2_tail, r2_score = result2[k] np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) np.testing.assert_allclose(r1_score, score_topk, rtol=1e-6, atol=1e-6) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-6, atol=1e-6)
def check_topk_score(model_name): hidden_dim = 32 gamma = 12.0 num_entity = 40 num_rels = 4 score_model = ScoreInfer(-1, 'config', 'path', 'none') if model_name == 'TransE' or \ model_name =='TransE_l1' or \ model_name == 'TransE_l2' or \ model_name == 'DistMult' or \ model_name == 'ComplEx': model = InferModel('cpu', model_name, hidden_dim, batch_size=16) elif model_name == 'RESCAL': model = InferModel('cpu', model_name, hidden_dim) elif model_name == 'RotatE': model = InferModel('cpu', model_name, hidden_dim, double_entity_emb=True) entity_emb, rel_emb = generate_rand_emb(model_name, num_entity, num_rels, hidden_dim, 'none') model.entity_emb = InferEmbedding('cpu') model.entity_emb.emb = entity_emb model.relation_emb = InferEmbedding('cpu') model.relation_emb.emb = rel_emb score_model.model = model score_func = model.score_func head = F.arange(0, num_entity // 2) rel = F.arange(0, num_rels) tail = F.arange(num_entity // 2, num_entity) # exec_model==triplet_wise tw_rel = np.random.randint(0, num_rels, num_entity // 2) tw_rel = F.tensor(tw_rel) result1 = score_model.topK(head, tw_rel, tail, exec_mode='triplet_wise') assert len(result1) == 1 scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, tw_rel[i], 0) temb = F.unsqueeze(F.take(entity_emb, tail[i], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(tw_rel[i])) tail_ids.append(F.asnumpy(tail[i])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) # exec_mode==all result1 = score_model.topK(head, rel, tail, k=20) result2 = score_model.topK(head=head, tail=tail, k=20) assert len(result1) == 1 assert len(result2) == 1 scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(rel.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:20] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[0] r2_head, r2_rel, r2_tail, r2_score = result2[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) result1 = score_model.topK(head, rel, tail, exec_mode='batch_rel') result2 = score_model.topK(head=head, tail=tail, exec_mode='batch_rel') assert len(result1) == num_rels assert len(result2) == num_rels for j in range(rel.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[j] r2_head, r2_rel, r2_tail, r2_score = result2[j] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) head = F.arange(0, num_entity) rel = F.arange(0, num_rels) tail = F.arange(0, num_entity) result1 = score_model.topK(head, rel, tail, exec_mode='batch_head') result2 = score_model.topK(exec_mode='batch_head') assert len(result1) == num_entity assert len(result2) == num_entity for i in range(head.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for j in range(rel.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[i] r2_head, r2_rel, r2_tail, r2_score = result2[i] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) result1 = score_model.topK(head, rel, tail, exec_mode='batch_tail') result2 = score_model.topK(exec_mode='batch_tail') assert len(result1) == num_entity assert len(result2) == num_entity for k in range(tail.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(rel.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[k] r2_head, r2_rel, r2_tail, r2_score = result2[k] np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) np.testing.assert_allclose(r1_score, score_topk, rtol=1e-6, atol=1e-6) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-6, atol=1e-6)