def generate_rand_graph(n, func_name): arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype( np.int64) g = dgl.DGLGraph(arr, readonly=True) num_rels = 10 entity_emb = F.uniform((g.number_of_nodes(), 10), F.float32, F.cpu(), 0, 1) if func_name == 'RotatE': entity_emb = F.uniform((g.number_of_nodes(), 20), F.float32, F.cpu(), 0, 1) rel_emb = F.uniform((num_rels, 10), F.float32, F.cpu(), -1, 1) if func_name == 'RESCAL': rel_emb = F.uniform((num_rels, 10 * 10), F.float32, F.cpu(), 0, 1) g.ndata['id'] = F.arange(0, g.number_of_nodes()) rel_ids = np.random.randint(0, num_rels, g.number_of_edges(), dtype=np.int64) g.edata['id'] = F.tensor(rel_ids, F.int64) # TransR have additional projection_emb if (func_name == 'TransR'): args = {'gpu': -1, 'lr': 0.1} args = dotdict(args) projection_emb = ExternalEmbedding(args, 10, 10 * 10, F.cpu()) return g, entity_emb, rel_emb, (12.0, projection_emb, 10, 10) elif (func_name == 'TransE'): return g, entity_emb, rel_emb, (12.0) elif (func_name == 'RESCAL'): return g, entity_emb, rel_emb, (10, 10) elif (func_name == 'RotatE'): return g, entity_emb, rel_emb, (12.0, 1.0) else: return g, entity_emb, rel_emb, None
def generate_rand_emb(func_name, num_entity, num_rels, dim, bcast): if bcast == 'rel': num_rels = 1 if bcast == 'head': num_head = 1 if bcast == 'tail': num_tail = 1 entity_emb = F.uniform((num_entity, dim), F.float32, F.cpu(), -1, 1) rel_emb = F.uniform((num_rels, dim), F.float32, F.cpu(), -1, 1) if func_name == 'RotatE': rel_emb = F.uniform((num_rels, dim//2), F.float32, F.cpu(), -1, 1) if func_name == 'RESCAL': rel_emb = F.uniform((num_rels, dim * dim), F.float32, F.cpu(), -1, 1) if func_name == 'TransE': return entity_emb, rel_emb elif func_name == 'TransE_l1': return entity_emb, rel_emb elif func_name == 'TransE_l2': return entity_emb, rel_emb elif func_name == 'RESCAL': return entity_emb, rel_emb elif func_name == 'RotatE': return entity_emb, rel_emb else: return entity_emb, rel_emb
def generate_rand_emb(func_name, bcast): dim=16 num_head = 16 num_rels = 4 num_tail = 32 if bcast == 'rel': num_rels = 1 if bcast == 'head': num_head = 1 if bcast == 'tail': num_tail = 1 head_emb = F.uniform((num_head, dim), F.float32, F.cpu(), 0, 1) tail_emb = F.uniform((num_tail, dim), F.float32, F.cpu(), 0, 1) rel_emb = F.uniform((num_rels, dim), F.float32, F.cpu(), -1, 1) if func_name == 'RotatE': rel_emb = F.uniform((num_rels, dim//2), F.float32, F.cpu(), -1, 1) if func_name == 'RESCAL': rel_emb = F.uniform((num_rels, dim * dim), F.float32, F.cpu(), -1, 1) if func_name == 'TransE': return head_emb, rel_emb, tail_emb, (12.0) elif func_name == 'TransE_l1': return head_emb, rel_emb, tail_emb, (12.0, 'l1') elif func_name == 'TransE_l2': return head_emb, rel_emb, tail_emb, (12.0, 'l2') elif func_name == 'RESCAL': return head_emb, rel_emb, tail_emb, (dim, dim) elif func_name == 'RotatE': return head_emb, rel_emb, tail_emb, (12.0, 1.0) else: return head_emb, rel_emb, tail_emb, None
def generate_rand_graph(n): arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype( np.int64) g = dgl.DGLGraph(arr, readonly=True) num_rels = 10 entity_emb = F.uniform((g.number_of_nodes(), 10), F.float32, F.cpu(), 0, 1) rel_emb = F.uniform((num_rels, 10), F.float32, F.cpu(), 0, 1) g.ndata['id'] = F.arange(0, g.number_of_nodes()) rel_ids = np.random.randint(0, num_rels, g.number_of_edges(), dtype=np.int64) g.edata['id'] = F.tensor(rel_ids, F.int64) return g, entity_emb, rel_emb
def _init_data(self, name, shape, init_type, low, high): """Initialize kvstore tensor. Parameters ---------- name : str data name shape : list of int The tensor shape init_type : str initialize method, including 'zero' and 'uniform' low : float min threshold high : float max threshold """ if init_type == 'uniform': self._data_store[name] = F.uniform(shape=shape, dtype=F.float32, ctx=F.cpu(), low=low, high=high) elif init_type == 'zero': self._data_store[name] = F.zeros(shape=shape, dtype=F.float32, ctx=F.cpu()) else: raise RuntimeError('Unknown initial method')
def run_topk_emb(sfunc, sim_func, create_emb_sim=create_kge_emb_sim): hidden_dim = 32 num_head = 40 num_tail = 40 num_emb = 80 emb = F.uniform((num_emb, hidden_dim), F.float32, F.cpu(), -1, 1) head = F.arange(0, num_head) tail = F.arange(num_head, num_head+num_tail) sim_infer = create_emb_sim(emb, sfunc) result1 = sim_infer.topK(head, tail, pair_ws=True) scores = [] head_ids = [] tail_ids = [] for i in range(head.shape[0]): j = i hemb = F.take(emb, head[i], 0) temb = F.take(emb, tail[j], 0) score = sim_func(hemb, temb) scores.append(F.asnumpy(score)) head_ids.append(F.asnumpy(head[i])) tail_ids.append(F.asnumpy(tail[j])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_tail, r1_score = result1[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_tail, tail_ids) print('pass pair wise') head = F.arange(0, num_head) tail = F.arange(num_head, num_head+num_tail) result1 = sim_infer.topK(head, tail) assert len(result1) == 1 scores = [] head_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(tail.shape[0]): hemb = F.take(emb, head[i], 0) temb = F.take(emb, tail[j], 0) score = sim_func(hemb, temb) scores.append(F.asnumpy(score)) head_ids.append(F.asnumpy(head[i])) tail_ids.append(F.asnumpy(tail[j])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_tail, r1_score = result1[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_tail, tail_ids) emb_ids = F.arange(0, num_emb) result1 = sim_infer.topK(emb_ids, emb_ids, bcast=True) result2 = sim_infer.topK(bcast=True) assert len(result1) == emb_ids.shape[0] assert len(result2) == emb_ids.shape[0] for i in range(emb_ids.shape[0]): scores = [] head_ids = [] tail_ids = [] for j in range(emb_ids.shape[0]): hemb = F.take(emb, emb_ids[i], 0) temb = F.take(emb, emb_ids[j], 0) score = sim_func(hemb, temb) score = F.asnumpy(score) scores.append(score) tail_ids.append(F.asnumpy(emb_ids[j])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = np.full((10,), F.asnumpy(emb_ids[i])) tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_tail, r1_score = result1[i] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_tail, tail_ids) r2_head, r2_tail, r2_score = result2[i] np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r2_tail, tail_ids) print('pass all')
def run_topk_emb2(sfunc, sim_func, emb_model): hidden_dim = 32 num_head = 40 num_tail = 40 num_emb = 80 with tempfile.TemporaryDirectory() as tmpdirname: emb = F.uniform((num_emb, hidden_dim), F.float32, F.cpu(), -1, 1) create_emb_file(Path(tmpdirname), 'entity.npy', emb.numpy()) create_emb_file(Path(tmpdirname), 'relation.npy', emb.numpy()) emb_model.load(Path(tmpdirname)) head = F.arange(0, num_head) tail = F.arange(num_head, num_head+num_tail) result1 = emb_model.embed_sim(head, tail, 'entity', sfunc=sfunc, pair_ws=True) scores = [] head_ids = [] tail_ids = [] for i in range(head.shape[0]): j = i hemb = F.take(emb, head[i], 0) temb = F.take(emb, tail[j], 0) score = sim_func(hemb, temb) scores.append(F.asnumpy(score)) head_ids.append(F.asnumpy(head[i])) tail_ids.append(F.asnumpy(tail[j])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_tail, r1_score = result1[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_tail, tail_ids) print('pass pair wise') head = F.arange(0, num_head) tail = F.arange(num_head, num_head+num_tail) result1 = emb_model.embed_sim(head, tail, 'entity', sfunc=sfunc) assert len(result1) == 1 scores = [] head_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(tail.shape[0]): hemb = F.take(emb, head[i], 0) temb = F.take(emb, tail[j], 0) score = sim_func(hemb, temb) scores.append(F.asnumpy(score)) head_ids.append(F.asnumpy(head[i])) tail_ids.append(F.asnumpy(tail[j])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_tail, r1_score = result1[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_tail, tail_ids) emb_ids = F.arange(0, num_emb) result1 = emb_model.embed_sim(emb_ids, emb_ids, 'entity', sfunc=sfunc, bcast=True) result2 = emb_model.embed_sim(embed_type='entity', sfunc=sfunc, bcast=True) assert len(result1) == emb_ids.shape[0] assert len(result2) == emb_ids.shape[0] for i in range(emb_ids.shape[0]): scores = [] head_ids = [] tail_ids = [] for j in range(emb_ids.shape[0]): hemb = F.take(emb, emb_ids[i], 0) temb = F.take(emb, emb_ids[j], 0) score = sim_func(hemb, temb) score = F.asnumpy(score) scores.append(score) tail_ids.append(F.asnumpy(emb_ids[j])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = np.full((10,), F.asnumpy(emb_ids[i])) tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_tail, r1_score = result1[i] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_tail, tail_ids) r2_head, r2_tail, r2_score = result2[i] np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r2_tail, tail_ids) print('pass all')