def test_hoppy_v1(): nb_entities = 10 nb_predicates = 5 embedding_size = 10 rs = np.random.RandomState(0) for _ in range(16): for nb_hops in range(6): for use_attention in [True, False]: with torch.no_grad(): entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) base = ComplEx(entity_embeddings) if use_attention: reformulator = AttentiveReformulator( nb_hops, predicate_embeddings) else: reformulator = LinearReformulator( nb_hops, embedding_size * 2) model = SimpleHoppy(base, entity_embeddings, hops=reformulator) xs = torch.from_numpy(rs.randint(nb_entities, size=32)) xp = torch.from_numpy(rs.randint(nb_predicates, size=32)) xo = torch.from_numpy(rs.randint(nb_entities, size=32)) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) scores = model.forward(xp_emb, xs_emb, xo_emb) inf = model.score(xp_emb, xs_emb, xo_emb) scores_sp, scores_po = scores inf = inf.cpu().numpy() scores_sp = scores_sp.cpu().numpy() scores_po = scores_po.cpu().numpy() for i in range(xs.shape[0]): np.testing.assert_allclose(inf[i], scores_sp[i, xo[i]], rtol=1e-3, atol=1e-3) np.testing.assert_allclose(inf[i], scores_po[i, xs[i]], rtol=1e-3, atol=1e-3)
def test_multi(): nb_entities = 10 nb_predicates = 5 embedding_size = 10 init_size = 1.0 rs = np.random.RandomState(0) for _ in range(8): for nb_hops in range(1, 6): for use_attention in [True, False]: for pt in {'max', 'min', 'sum', 'mixture'}: with torch.no_grad(): entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) entity_embeddings.weight.data *= init_size predicate_embeddings.weight.data *= init_size base = ComplEx(entity_embeddings) models = [] for i in range(nb_hops): if use_attention: reformulator = AttentiveReformulator( i, predicate_embeddings) else: reformulator = LinearReformulator( i, embedding_size * 2) h_model = SimpleHoppy(base, entity_embeddings, hops=reformulator) models += [h_model] model = Multi(models=models, pooling_type=pt, embedding_size=embedding_size * 2) xs = torch.from_numpy(rs.randint(nb_entities, size=32)) xp = torch.from_numpy( rs.randint(nb_predicates, size=32)) xo = torch.from_numpy(rs.randint(nb_entities, size=32)) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) scores = model.forward(xp_emb, xs_emb, xo_emb) inf = model.score(xp_emb, xs_emb, xo_emb) scores_sp, scores_po = scores inf = inf.cpu().numpy() scores_sp = scores_sp.cpu().numpy() scores_po = scores_po.cpu().numpy() for i in range(xs.shape[0]): np.testing.assert_allclose(inf[i], scores_sp[i, xo[i]], rtol=1e-3, atol=1e-3) np.testing.assert_allclose(inf[i], scores_po[i, xs[i]], rtol=1e-3, atol=1e-3)
def _test_reasoning_v5(_st): torch.set_num_threads(multiprocessing.cpu_count()) nb_entities = 10 nb_predicates = 5 embedding_size = 20 rs = np.random.RandomState(0) triples = [('a', 'p', 'b'), ('b', 'q', 'c'), ('c', 'r', 'd'), ('d', 's', 'e')] entity_to_index = {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4} predicate_to_index = {'p': 0, 'q': 1, 'r': 2, 's': 3} for st in [_st]: with torch.no_grad(): kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) fact_rel = torch.from_numpy( np.array([predicate_to_index[p] for (_, p, _) in triples])) fact_arg1 = torch.from_numpy( np.array([entity_to_index[s] for (s, _, _) in triples])) fact_arg2 = torch.from_numpy( np.array([entity_to_index[o] for (_, _, o) in triples])) facts = [fact_rel, fact_arg1, fact_arg2] model = NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, kernel=kernel, facts=facts, scoring_type=st) indices = torch.from_numpy( np.array([ predicate_to_index['p'], predicate_to_index['q'], predicate_to_index['r'], predicate_to_index['s'] ])) reformulator = SymbolicReformulator(predicate_embeddings, indices) hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator) rhoppy = RecursiveHoppy(model, entity_embeddings, hops=reformulator, depth=1) xs_np = rs.randint(nb_entities, size=32) xp_np = rs.randint(nb_predicates, size=32) xo_np = rs.randint(nb_entities, size=32) xs_np[0] = 0 xp_np[0] = 0 xo_np[0] = 1 xs_np[1] = 1 xp_np[1] = 1 xo_np[1] = 2 xs_np[2] = 0 xp_np[2] = 3 xo_np[2] = 4 xs = torch.from_numpy(xs_np) xp = torch.from_numpy(xp_np) xo = torch.from_numpy(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) scores = hoppy.forward(xp_emb, xs_emb, xo_emb) inf = hoppy.score(xp_emb, xs_emb, xo_emb) scores_h = rhoppy.depth_r_forward(xp_emb, xs_emb, xo_emb, depth=1) inf_h = rhoppy.depth_r_score(xp_emb, xs_emb, xo_emb, depth=1) print(inf) print(inf_h) assert inf[2] > 0.95 scores_sp, scores_po = scores scores_h_sp, scores_h_po = scores_h inf = inf.cpu().numpy() scores_sp = scores_sp.cpu().numpy() scores_po = scores_po.cpu().numpy() inf_h = inf_h.cpu().numpy() scores_h_sp = scores_h_sp.cpu().numpy() scores_h_po = scores_h_po.cpu().numpy() np.testing.assert_allclose(inf, inf_h) np.testing.assert_allclose(scores_sp, scores_h_sp) np.testing.assert_allclose(scores_po, scores_h_po) for i in range(xs.shape[0]): np.testing.assert_allclose(inf[i], scores_sp[i, xo[i]], rtol=1e-2, atol=1e-2) np.testing.assert_allclose(inf[i], scores_po[i, xs[i]], rtol=1e-2, atol=1e-2) np.testing.assert_allclose(inf_h[i], scores_h_sp[i, xo[i]], rtol=1e-2, atol=1e-2) np.testing.assert_allclose(inf_h[i], scores_h_po[i, xs[i]], rtol=1e-2, atol=1e-2)
def test_masking_v2(): nb_entities = 10 nb_predicates = 5 embedding_size = 10 rs = np.random.RandomState(0) for _ in range(1): for position in [0, 1, 2]: for st in ['min', 'concat']: with torch.no_grad(): triples = [('a', 'p', 'b'), ('b', 'q', 'c'), ('a', 'p', 'c')] entity_to_index = {'a': 0, 'b': 1, 'c': 2, 'd': 3} predicate_to_index = {'p': 0, 'q': 1} kernel = GaussianKernel() entity_emb = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_emb = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) fact_rel = torch.from_numpy( np.array( [predicate_to_index[p] for (_, p, _) in triples])) fact_arg1 = torch.from_numpy( np.array([entity_to_index[s] for (s, _, _) in triples])) fact_arg2 = torch.from_numpy( np.array([entity_to_index[o] for (_, _, o) in triples])) facts = [fact_rel, fact_arg1, fact_arg2] base = NeuralKB(entity_embeddings=entity_emb, predicate_embeddings=predicate_emb, kernel=kernel, facts=facts, scoring_type=st) indices = torch.from_numpy( np.array( [predicate_to_index['p'], predicate_to_index['q']])) reformulator = SymbolicReformulator(predicate_emb, indices) model = SimpleHoppy(base, entity_emb, hops=reformulator) xs_np = rs.randint(nb_entities, size=32) xp_np = rs.randint(nb_predicates, size=32) xo_np = rs.randint(nb_entities, size=32) xi_np = np.array([position] * xs_np.shape[0]) xs_np[0] = 0 xp_np[0] = 0 xo_np[0] = 1 xs_np[1] = 1 xp_np[1] = 1 xo_np[1] = 2 xs_np[2] = 0 xp_np[2] = 0 xo_np[2] = 2 xs = torch.from_numpy(xs_np) xp = torch.from_numpy(xp_np) xo = torch.from_numpy(xo_np) xi = torch.from_numpy(xi_np) xs_emb = entity_emb(xs) xp_emb = predicate_emb(xp) xo_emb = entity_emb(xo) # xi = None base.mask_indices = xi scores = model.forward(xp_emb, xs_emb, xo_emb) inf = model.score(xp_emb, xs_emb, xo_emb) if position in {0, 1}: assert inf[2] < 0.5 else: assert inf[2] > 0.9 scores_sp, scores_po = scores inf = inf.cpu().numpy() scores_sp = scores_sp.cpu().numpy() scores_po = scores_po.cpu().numpy() for i in range(xs.shape[0]): np.testing.assert_allclose(inf[i], scores_sp[i, xo[i]], rtol=1e-2, atol=1e-2) np.testing.assert_allclose(inf[i], scores_po[i, xs[i]], rtol=1e-2, atol=1e-2)
def test_learning_v2(): embedding_size = 100 torch.manual_seed(0) triples, hops = [], [] for i in range(16): triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')] hops += [(f'a{i}', 'r', f'c{i}')] entity_lst = sorted({e for (e, _, _) in triples + hops} | {e for (e, _, e) in triples + hops}) predicate_lst = sorted({p for (_, p, _) in triples + hops}) nb_entities, nb_predicates = len(entity_lst), len(predicate_lst) entity_to_index = {e: i for i, e in enumerate(entity_lst)} predicate_to_index = {p: i for i, p in enumerate(predicate_lst)} kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) fact_rel = torch.from_numpy( np.array([predicate_to_index[p] for (_, p, _) in triples])) fact_arg1 = torch.from_numpy( np.array([entity_to_index[s] for (s, _, _) in triples])) fact_arg2 = torch.from_numpy( np.array([entity_to_index[o] for (_, _, o) in triples])) facts = [fact_rel, fact_arg1, fact_arg2] model = NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, kernel=kernel, facts=facts) reformulator = AttentiveReformulator(2, predicate_embeddings) hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator) for s, p, o in hops: xs_np = np.array([entity_to_index[s]]) xp_np = np.array([predicate_to_index[p]]) xo_np = np.array([entity_to_index[o]]) with torch.no_grad(): xs = torch.from_numpy(xs_np) xp = torch.from_numpy(xp_np) xo = torch.from_numpy(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) inf = hoppy.score(xp_emb, xs_emb, xo_emb) inf_np = inf.cpu().numpy() assert inf_np < 0.5
def test_learning_v3(): embedding_size = 10 batch_size = 16 triples, hops = [], [] for i in range(16): triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')] hops += [(f'a{i}', 'r', f'c{i}')] entity_lst = sorted({e for (e, _, _) in triples + hops} | {e for (e, _, e) in triples + hops}) predicate_lst = sorted({p for (_, p, _) in triples + hops}) nb_entities, nb_predicates = len(entity_lst), len(predicate_lst) entity_to_index = {e: i for i, e in enumerate(entity_lst)} predicate_to_index = {p: i for i, p in enumerate(predicate_lst)} torch.manual_seed(0) kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) fact_rel = torch.from_numpy( np.array([predicate_to_index[p] for (_, p, _) in triples])) fact_arg1 = torch.from_numpy( np.array([entity_to_index[s] for (s, _, _) in triples])) fact_arg2 = torch.from_numpy( np.array([entity_to_index[o] for (_, _, o) in triples])) facts = [fact_rel, fact_arg1, fact_arg2] model = NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, kernel=kernel, facts=facts) reformulator = AttentiveReformulator(2, predicate_embeddings) hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator) N3_reg = N3() params = [ p for p in hoppy.parameters() if not torch.equal(p, entity_embeddings.weight) and not torch.equal(p, predicate_embeddings.weight) ] loss_function = nn.CrossEntropyLoss(reduction='mean') p_emb = predicate_embeddings( torch.from_numpy(np.array([predicate_to_index['p']]))) q_emb = predicate_embeddings( torch.from_numpy(np.array([predicate_to_index['q']]))) # r_emb = predicate_embeddings(torch.from_numpy(np.array([predicate_to_index['r']]))) optimizer = optim.Adagrad(params, lr=0.1) hops_data = [] for i in range(128): hops_data += hops batches = make_batches(len(hops_data), batch_size) c, d = 0.0, 0.0 for batch_start, batch_end in batches: hops_batch = hops_data[batch_start:batch_end] s_lst = [s for (s, _, _) in hops_batch] p_lst = [p for (_, p, _) in hops_batch] o_lst = [o for (_, _, o) in hops_batch] xs_np = np.array([entity_to_index[s] for s in s_lst]) xp_np = np.array([predicate_to_index[p] for p in p_lst]) xo_np = np.array([entity_to_index[o] for o in o_lst]) xs = torch.from_numpy(xs_np) xp = torch.from_numpy(xp_np) xo = torch.from_numpy(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) sp_scores, po_scores = hoppy.forward(xp_emb, xs_emb, xo_emb) loss = loss_function(sp_scores, xo) + loss_function(po_scores, xs) factors = [hoppy.factor(e) for e in [xp_emb, xs_emb, xo_emb]] loss += 0.1 * N3_reg(factors) tmp = hoppy.hops(xp_emb) hop_1_emb = tmp[0] hop_2_emb = tmp[1] c = kernel.pairwise(p_emb, hop_1_emb).mean().cpu().detach().numpy() d = kernel.pairwise(q_emb, hop_2_emb).mean().cpu().detach().numpy() loss.backward() optimizer.step() optimizer.zero_grad() assert c > 0.95 assert d > 0.95