Exemple #1
0
 def make_hop(s: str) -> Tuple[BaseReformulator, bool]:
     nonlocal memory
     if s.isdigit():
         nb_hops, is_reversed = int(s), False
     else:
         nb_hops, is_reversed = int(s[:-1]), True
     res = None
     if reformulator_type in {'static'}:
         res = StaticReformulator(nb_hops, embedding_size, init_name=ref_init_type,
                                  lower_bound=lower_bound, upper_bound=upper_bound)
     elif reformulator_type in {'linear'}:
         res = LinearReformulator(nb_hops, embedding_size, init_name=ref_init_type,
                                  lower_bound=lower_bound, upper_bound=upper_bound)
     elif reformulator_type in {'attentive'}:
         res = AttentiveReformulator(nb_hops, predicate_embeddings, init_name=ref_init_type,
                                     lower_bound=lower_bound, upper_bound=upper_bound)
     elif reformulator_type in {'memory'}:
         if memory is None:
             memory = MemoryReformulator.Memory(nb_hops, nb_rules, embedding_size, init_name=ref_init_type,
                                                lower_bound=lower_bound, upper_bound=upper_bound)
         res = MemoryReformulator(memory)
     elif reformulator_type in {'ntp'}:
         res = NTPReformulator(nb_hops=nb_hops, embedding_size=embedding_size,
                               kernel=kernel, init_name=ref_init_type,
                               lower_bound=lower_bound, upper_bound=upper_bound)
     assert res is not None
     return res, is_reversed
Exemple #2
0
def test_hoppy_v1():
    nb_entities = 10
    nb_predicates = 5
    embedding_size = 10

    rs = np.random.RandomState(0)

    for _ in range(16):
        for nb_hops in range(6):
            for use_attention in [True, False]:
                with torch.no_grad():
                    entity_embeddings = nn.Embedding(nb_entities,
                                                     embedding_size * 2,
                                                     sparse=True)
                    predicate_embeddings = nn.Embedding(nb_predicates,
                                                        embedding_size * 2,
                                                        sparse=True)

                    base = ComplEx(entity_embeddings)

                    if use_attention:
                        reformulator = AttentiveReformulator(
                            nb_hops, predicate_embeddings)
                    else:
                        reformulator = LinearReformulator(
                            nb_hops, embedding_size * 2)

                    model = SimpleHoppy(base,
                                        entity_embeddings,
                                        hops=reformulator)

                    xs = torch.LongTensor(rs.randint(nb_entities, size=32))
                    xp = torch.LongTensor(rs.randint(nb_predicates, size=32))
                    xo = torch.LongTensor(rs.randint(nb_entities, size=32))

                    xs_emb = entity_embeddings(xs)
                    xp_emb = predicate_embeddings(xp)
                    xo_emb = entity_embeddings(xo)

                    scores = model.forward(xp_emb, xs_emb, xo_emb)
                    inf = model.score(xp_emb, xs_emb, xo_emb)

                    scores_sp, scores_po = scores

                    inf = inf.cpu().numpy()
                    scores_sp = scores_sp.cpu().numpy()
                    scores_po = scores_po.cpu().numpy()

                    for i in range(xs.shape[0]):
                        np.testing.assert_allclose(inf[i],
                                                   scores_sp[i, xo[i]],
                                                   rtol=1e-5,
                                                   atol=1e-5)
                        np.testing.assert_allclose(inf[i],
                                                   scores_po[i, xs[i]],
                                                   rtol=1e-5,
                                                   atol=1e-5)
Exemple #3
0
 def make_hop(s: str) -> Tuple[BaseReformulator, bool]:
     nonlocal memory
     if s.isdigit():
         nb_hops, is_reversed = int(s), False
     else:
         nb_hops, is_reversed = int(s[:-1]), True
     res = None
     if reformulator_type in {'static'}:
         res = StaticReformulator(nb_hops, rank)
     elif reformulator_type in {'linear'}:
         res = LinearReformulator(nb_hops, rank)
     elif reformulator_type in {'attentive'}:
         res = AttentiveReformulator(nb_hops, predicate_embeddings)
     elif reformulator_type in {'memory'}:
         memory = MemoryReformulator.Memory(
             nb_hops, nb_rules, rank) if memory is None else memory
         res = MemoryReformulator(memory)
     assert res is not None
     return res, is_reversed
Exemple #4
0
def test_multirhoppy_v1():
    nb_entities = 10
    nb_predicates = 5
    embedding_size = 10

    init_size = 1.0

    rs = np.random.RandomState(0)

    for _ in range(8):
        for nb_hops_lst in [[1], [2], [3], [1, 2], [2, 2], [3, 2], [1, 2, 2],
                            [2, 2, 2], [3, 2, 2]]:
            for depth in range(3):
                for use_attention in [True, False]:
                    with torch.no_grad():
                        entity_embeddings = nn.Embedding(nb_entities,
                                                         embedding_size * 2,
                                                         sparse=True)
                        predicate_embeddings = nn.Embedding(nb_predicates,
                                                            embedding_size * 2,
                                                            sparse=True)

                        entity_embeddings.weight.data *= init_size
                        predicate_embeddings.weight.data *= init_size

                        base = ComplEx(entity_embeddings)

                        hops_lst = []
                        for i in nb_hops_lst:
                            if use_attention:
                                reformulator = AttentiveReformulator(
                                    i, predicate_embeddings)
                            else:
                                reformulator = LinearReformulator(
                                    i, embedding_size * 2)
                            hops_lst += [(reformulator, False)]

                        model = Hoppy(model=base,
                                      entity_embeddings=entity_embeddings,
                                      hops_lst=hops_lst,
                                      depth=depth)

                        xs = torch.LongTensor(rs.randint(nb_entities, size=32))
                        xp = torch.LongTensor(
                            rs.randint(nb_predicates, size=32))
                        xo = torch.LongTensor(rs.randint(nb_entities, size=32))

                        xs_emb = entity_embeddings(xs)
                        xp_emb = predicate_embeddings(xp)
                        xo_emb = entity_embeddings(xo)

                        scores = model.forward(xp_emb, xs_emb, xo_emb)
                        inf = model.score(xp_emb, xs_emb, xo_emb)

                        scores_sp, scores_po = scores

                        inf = inf.cpu().numpy()
                        scores_sp = scores_sp.cpu().numpy()
                        scores_po = scores_po.cpu().numpy()

                        for i in range(xs.shape[0]):
                            np.testing.assert_allclose(inf[i],
                                                       scores_sp[i, xo[i]],
                                                       rtol=1e-5,
                                                       atol=1e-5)
                            np.testing.assert_allclose(inf[i],
                                                       scores_po[i, xs[i]],
                                                       rtol=1e-5,
                                                       atol=1e-5)
Exemple #5
0
def test_clutrr_v3():
    embedding_size = 20
    batch_size = 8

    torch.manual_seed(0)

    triples, hops = [], []

    for i in range(32):
        triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')]
        hops += [(f'a{i}', 'r', f'c{i}')]

    entity_lst = sorted({s
                         for (s, _, _) in triples + hops}
                        | {o
                           for (_, _, o) in triples + hops})
    predicate_lst = sorted({p for (_, p, _) in triples + hops})

    nb_entities, nb_predicates = len(entity_lst), len(predicate_lst)

    entity_to_index = {e: i for i, e in enumerate(entity_lst)}
    predicate_to_index = {p: i for i, p in enumerate(predicate_lst)}

    kernel = GaussianKernel(slope=None)

    entity_embeddings = nn.Embedding(nb_entities, embedding_size, sparse=True)
    predicate_embeddings = nn.Embedding(nb_predicates,
                                        embedding_size,
                                        sparse=True)

    # _hops = LinearReformulator(2, embedding_size)
    _hops = AttentiveReformulator(2, predicate_embeddings)

    model = NeuralKB(kernel=kernel, scoring_type='concat')
    hoppy = Hoppy(model, hops_lst=[(_hops, False)], depth=1)

    params = [
        p for p in hoppy.parameters()
        if not torch.equal(p, entity_embeddings.weight)
        and not torch.equal(p, predicate_embeddings.weight)
    ]

    for tensor in params:
        print(f'\t{tensor.size()}\t{tensor.device}')

    loss_function = nn.BCELoss()

    optimizer = optim.Adagrad(params, lr=0.1)

    hops_data = []
    for i in range(64):
        hops_data += hops

    batches = make_batches(len(hops_data), batch_size)

    rs = np.random.RandomState()

    c, d = 0.0, 0.0
    p_emb = predicate_embeddings(
        torch.from_numpy(np.array([predicate_to_index['p']])))
    q_emb = predicate_embeddings(
        torch.from_numpy(np.array([predicate_to_index['q']])))

    for batch_start, batch_end in batches:
        hops_batch = hops_data[batch_start:batch_end]

        s_lst = [s for (s, _, _) in hops_batch]
        p_lst = [p for (_, p, _) in hops_batch]
        o_lst = [o for (_, _, o) in hops_batch]

        nb_positives = len(s_lst)
        nb_negatives = nb_positives * 3

        s_n_lst = rs.permutation(nb_entities)[:nb_negatives].tolist()
        nb_negatives = len(s_n_lst)
        o_n_lst = rs.permutation(nb_entities)[:nb_negatives].tolist()
        p_n_lst = list(islice(cycle(p_lst), nb_negatives))

        xs_np = np.array([entity_to_index[s] for s in s_lst] + s_n_lst)
        xp_np = np.array([predicate_to_index[p] for p in p_lst + p_n_lst])
        xo_np = np.array([entity_to_index[o] for o in o_lst] + o_n_lst)

        xs_emb = entity_embeddings(torch.from_numpy(xs_np))
        xp_emb = predicate_embeddings(torch.from_numpy(xp_np))
        xo_emb = entity_embeddings(torch.from_numpy(xo_np))

        rel_emb = encode_relation(facts=triples,
                                  relation_embeddings=predicate_embeddings,
                                  relation_to_idx=predicate_to_index)
        arg1_emb, arg2_emb = encode_arguments(
            facts=triples,
            entity_embeddings=entity_embeddings,
            entity_to_idx=entity_to_index)

        facts = [rel_emb, arg1_emb, arg2_emb]

        scores = hoppy.score(xp_emb,
                             xs_emb,
                             xo_emb,
                             facts=facts,
                             entity_embeddings=entity_embeddings.weight)

        labels_np = np.zeros(xs_np.shape[0])
        labels_np[:nb_positives] = 1
        labels = torch.from_numpy(labels_np).float()

        # for s, p, o, l in zip(xs_np, xp_np, xo_np, labels):
        #     print(s, p, o, l)

        loss = loss_function(scores, labels)

        hop_1_emb = hoppy.hops_lst[0][0].hops_lst[0](xp_emb)
        hop_2_emb = hoppy.hops_lst[0][0].hops_lst[1](xp_emb)

        c = kernel.pairwise(p_emb, hop_1_emb).mean().cpu().detach().numpy()
        d = kernel.pairwise(q_emb, hop_2_emb).mean().cpu().detach().numpy()

        print(c, d)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    assert c > 0.95 and d > 0.95
Exemple #6
0
def test_multi():
    nb_entities = 10
    nb_predicates = 5
    embedding_size = 10

    init_size = 1.0

    rs = np.random.RandomState(0)

    for _ in range(16):
        for nb_hops in range(1, 6):
            for use_attention in [True, False]:
                for pt in {'max', 'min', 'sum', 'mixture'}:
                    with torch.no_grad():
                        entity_embeddings = nn.Embedding(nb_entities,
                                                         embedding_size * 2,
                                                         sparse=True)
                        predicate_embeddings = nn.Embedding(nb_predicates,
                                                            embedding_size * 2,
                                                            sparse=True)

                        entity_embeddings.weight.data *= init_size
                        predicate_embeddings.weight.data *= init_size

                        base = ComplEx(entity_embeddings)

                        models = []
                        for i in range(nb_hops):
                            if use_attention:
                                reformulator = AttentiveReformulator(
                                    i, predicate_embeddings)
                            else:
                                reformulator = LinearReformulator(
                                    i, embedding_size * 2)

                            h_model = SimpleHoppy(base,
                                                  entity_embeddings,
                                                  hops=reformulator)
                            models += [h_model]

                        model = Multi(models=models,
                                      pooling_type=pt,
                                      embedding_size=embedding_size * 2)

                        xs = torch.LongTensor(rs.randint(nb_entities, size=32))
                        xp = torch.LongTensor(
                            rs.randint(nb_predicates, size=32))
                        xo = torch.LongTensor(rs.randint(nb_entities, size=32))

                        xs_emb = entity_embeddings(xs)
                        xp_emb = predicate_embeddings(xp)
                        xo_emb = entity_embeddings(xo)

                        scores = model.forward(xp_emb, xs_emb, xo_emb)
                        inf = model.score(xp_emb, xs_emb, xo_emb)

                        scores_sp, scores_po = scores

                        inf = inf.cpu().numpy()
                        scores_sp = scores_sp.cpu().numpy()
                        scores_po = scores_po.cpu().numpy()

                        for i in range(xs.shape[0]):
                            np.testing.assert_allclose(inf[i],
                                                       scores_sp[i, xo[i]],
                                                       rtol=1e-5,
                                                       atol=1e-5)
                            np.testing.assert_allclose(inf[i],
                                                       scores_po[i, xs[i]],
                                                       rtol=1e-5,
                                                       atol=1e-5)
Exemple #7
0
def test_learning_v2():
    embedding_size = 100

    torch.manual_seed(0)

    triples, hops = [], []

    for i in range(16):
        triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')]
        hops += [(f'a{i}', 'r', f'c{i}')]

    entity_lst = sorted({e
                         for (e, _, _) in triples + hops}
                        | {e
                           for (e, _, e) in triples + hops})
    predicate_lst = sorted({p for (_, p, _) in triples + hops})

    nb_entities, nb_predicates = len(entity_lst), len(predicate_lst)

    entity_to_index = {e: i for i, e in enumerate(entity_lst)}
    predicate_to_index = {p: i for i, p in enumerate(predicate_lst)}

    kernel = GaussianKernel()

    entity_embeddings = nn.Embedding(nb_entities,
                                     embedding_size * 2,
                                     sparse=True)
    predicate_embeddings = nn.Embedding(nb_predicates,
                                        embedding_size * 2,
                                        sparse=True)

    fact_rel = torch.LongTensor(
        np.array([predicate_to_index[p] for (_, p, _) in triples]))
    fact_arg1 = torch.LongTensor(
        np.array([entity_to_index[s] for (s, _, _) in triples]))
    fact_arg2 = torch.LongTensor(
        np.array([entity_to_index[o] for (_, _, o) in triples]))
    facts = [fact_rel, fact_arg1, fact_arg2]

    model = NeuralKB(entity_embeddings=entity_embeddings,
                     predicate_embeddings=predicate_embeddings,
                     kernel=kernel,
                     facts=facts)

    reformulator = AttentiveReformulator(2, predicate_embeddings)
    hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator)

    for s, p, o in hops:
        xs_np = np.array([entity_to_index[s]])
        xp_np = np.array([predicate_to_index[p]])
        xo_np = np.array([entity_to_index[o]])

        with torch.no_grad():
            xs = torch.LongTensor(xs_np)
            xp = torch.LongTensor(xp_np)
            xo = torch.LongTensor(xo_np)

            xs_emb = entity_embeddings(xs)
            xp_emb = predicate_embeddings(xp)
            xo_emb = entity_embeddings(xo)

            inf = hoppy.score(xp_emb, xs_emb, xo_emb)

            inf_np = inf.cpu().numpy()
            assert inf_np < 0.5
Exemple #8
0
def test_learning_v3():
    embedding_size = 10
    batch_size = 16

    triples, hops = [], []

    for i in range(16):
        triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')]
        hops += [(f'a{i}', 'r', f'c{i}')]

    entity_lst = sorted({e
                         for (e, _, _) in triples + hops}
                        | {e
                           for (e, _, e) in triples + hops})
    predicate_lst = sorted({p for (_, p, _) in triples + hops})

    nb_entities, nb_predicates = len(entity_lst), len(predicate_lst)

    entity_to_index = {e: i for i, e in enumerate(entity_lst)}
    predicate_to_index = {p: i for i, p in enumerate(predicate_lst)}

    torch.manual_seed(0)

    kernel = GaussianKernel()

    entity_embeddings = nn.Embedding(nb_entities,
                                     embedding_size * 2,
                                     sparse=True)
    predicate_embeddings = nn.Embedding(nb_predicates,
                                        embedding_size * 2,
                                        sparse=True)

    fact_rel = torch.LongTensor(
        np.array([predicate_to_index[p] for (_, p, _) in triples]))
    fact_arg1 = torch.LongTensor(
        np.array([entity_to_index[s] for (s, _, _) in triples]))
    fact_arg2 = torch.LongTensor(
        np.array([entity_to_index[o] for (_, _, o) in triples]))
    facts = [fact_rel, fact_arg1, fact_arg2]

    model = NeuralKB(entity_embeddings=entity_embeddings,
                     predicate_embeddings=predicate_embeddings,
                     kernel=kernel,
                     facts=facts)

    reformulator = AttentiveReformulator(2, predicate_embeddings)
    hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator)

    N3_reg = N3()

    params = [
        p for p in hoppy.parameters()
        if not torch.equal(p, entity_embeddings.weight)
        and not torch.equal(p, predicate_embeddings.weight)
    ]

    loss_function = nn.CrossEntropyLoss(reduction='mean')

    p_emb = predicate_embeddings(
        torch.LongTensor(np.array([predicate_to_index['p']])))
    q_emb = predicate_embeddings(
        torch.LongTensor(np.array([predicate_to_index['q']])))
    # r_emb = predicate_embeddings(torch.LongTensor(np.array([predicate_to_index['r']])))

    optimizer = optim.Adagrad(params, lr=0.1)

    hops_data = []
    for i in range(128):
        hops_data += hops

    batches = make_batches(len(hops_data), batch_size)

    c, d = 0.0, 0.0

    for batch_start, batch_end in batches:
        hops_batch = hops_data[batch_start:batch_end]

        s_lst = [s for (s, _, _) in hops_batch]
        p_lst = [p for (_, p, _) in hops_batch]
        o_lst = [o for (_, _, o) in hops_batch]

        xs_np = np.array([entity_to_index[s] for s in s_lst])
        xp_np = np.array([predicate_to_index[p] for p in p_lst])
        xo_np = np.array([entity_to_index[o] for o in o_lst])

        xs = torch.LongTensor(xs_np)
        xp = torch.LongTensor(xp_np)
        xo = torch.LongTensor(xo_np)

        xs_emb = entity_embeddings(xs)
        xp_emb = predicate_embeddings(xp)
        xo_emb = entity_embeddings(xo)

        sp_scores, po_scores = hoppy.forward(xp_emb, xs_emb, xo_emb)

        loss = loss_function(sp_scores, xo) + loss_function(po_scores, xs)

        factors = [hoppy.factor(e) for e in [xp_emb, xs_emb, xo_emb]]
        loss += 0.1 * N3_reg(factors)

        tmp = hoppy.hops(xp_emb)
        hop_1_emb = tmp[0]
        hop_2_emb = tmp[1]

        c = kernel.pairwise(p_emb, hop_1_emb).mean().cpu().detach().numpy()
        d = kernel.pairwise(q_emb, hop_2_emb).mean().cpu().detach().numpy()

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    assert c > 0.95
    assert d > 0.95