コード例 #1
0
ファイル: test_models.py プロジェクト: zdqf/AmpliGraph
def test_predict():
    model = DistMult(batches_count=2,
                     seed=555,
                     epochs=1,
                     k=10,
                     loss='pairwise',
                     loss_params={'margin': 5},
                     optimizer='adagrad',
                     optimizer_params={'lr': 0.1})
    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'z', 'a'], ['a', 'z', 'd']])
    model.fit(X)

    preds1 = model.predict(X)
    preds2 = model.predict(to_idx(X, model.ent_to_idx, model.rel_to_idx),
                           from_idx=True)

    np.testing.assert_array_equal(preds1, preds2)
コード例 #2
0
def test_oneton_adapter():

    from ampligraph.evaluation.protocol import create_mappings, to_idx

    # Train set
    X = np.array([['a', 'p', 'b'], ['a', 'p', 'd'], ['c', 'p', 'd'],
                  ['c', 'p', 'e'], ['c', 'p', 'f']])

    #              a, b, c, d, e, f
    O = np.array([
        [0, 1, 0, 1, 0, 0],  # (a, p)
        [0, 0, 0, 1, 1, 1]
    ])  # (c, p)

    # Test
    T = np.array([['a', 'p', 'c'], ['c', 'p', 'b']])

    #               a, b, c, d, e, f
    OT1 = np.array([
        [
            0, 1, 0, 1, 0, 0
        ],  # (a, p)     # test set onehots when output mapping is from train set
        [0, 0, 0, 1, 1, 1]
    ]),  # (c, p)
    OT2 = np.array([
        [
            0, 0, 1, 0, 0, 0
        ],  # (a, p)     # test set onehots when output mapping is from test set
        [0, 1, 0, 0, 0, 0]
    ]),  # (c, p)

    # Filter
    filter = np.concatenate((X, T))
    #               a, b, c, d, e, f
    OF = np.array([
        [0, 1, 1, 1, 0,
         0],  # (a, p)   # train set onehots when output mapping is from filter
        [0, 1, 0, 1, 1, 1]
    ])  # (c, p)

    # Expected input tuple to filtered outputs
    OF_map = {(0, 0): [0, 1, 1, 1, 0, 0], (2, 0): [0, 1, 0, 1, 1, 1]}

    rel_to_idx, ent_to_idx = create_mappings(X)
    X = to_idx(X, ent_to_idx, rel_to_idx)

    adapter = OneToNDatasetAdapter()
    adapter.use_mappings(rel_to_idx, ent_to_idx)
    adapter.set_data(X, 'train', mapped_status=True)

    adapter.set_data(T, 'test', mapped_status=False)

    # Adapter internally maps test set
    assert (adapter.mapped_status['test'] == True)

    # Generate output map
    train_output_map = adapter.generate_output_mapping('train')

    # Assert all unique sp pairs are in the output_map keys
    unique_sp = set([(s, p) for s, p in X[:, [0, 1]]])
    for sp in train_output_map.keys():
        assert (sp in unique_sp)

    # ValueError if generating onehot outputs before output_mapping is set
    with pytest.raises(ValueError):
        adapter.generate_outputs('train')

    adapter.set_output_mapping(train_output_map)
    adapter.generate_outputs('train')
    train_iter = adapter.get_next_batch(batches_count=1,
                                        dataset_type='train',
                                        use_filter=False)
    triples, onehot = next(train_iter)
    assert np.all(np.unique(X[:, [0, 1]], axis=0) == triples[:, [0, 1]])
    assert np.all(O == onehot)

    test_iter = adapter.get_next_batch(batches_count=1,
                                       dataset_type='test',
                                       use_filter=False)

    triples, onehot = next(test_iter)
    assert np.all(np.unique(X[:, [0, 1]], axis=0) == triples[:, [0, 1]])
    assert np.all(OT1 == onehot)

    # Generate test output map
    test_output_map = adapter.generate_output_mapping('test')
    adapter.set_output_mapping(test_output_map)

    test_iter = adapter.get_next_batch(batches_count=1,
                                       dataset_type='test',
                                       use_filter=False)

    triples, onehot = next(test_iter)
    assert np.all(np.unique(X[:, [0, 1]], axis=0) == triples[:, [0, 1]])
    assert np.all(OT2 == onehot)

    # Train onehot outputs with filter=True
    adapter.set_filter(filter_triples=filter)
    train_iter = adapter.get_next_batch(batches_count=1,
                                        dataset_type='train',
                                        use_filter=True)
    triples, onehot = next(train_iter)
    assert np.all(np.unique(X[:, [0, 1]], axis=0) == triples[:, [0, 1]])
    assert np.all(OF == onehot)

    ##  Test adapter clear_outputs
    assert (len(adapter.filtered_status) > 0)
    adapter.clear_outputs()
    assert (len(adapter.filtered_status) == 0)

    # Test verify_outputs
    adapter.clear_outputs()
    adapter.generate_outputs('train', use_filter=False, unique_pairs=False)
    assert adapter.verify_outputs('train',
                                  use_filter=False,
                                  unique_pairs=False) == True
    assert adapter.verify_outputs('train', use_filter=True,
                                  unique_pairs=True) == False
    assert adapter.verify_outputs('train', use_filter=True,
                                  unique_pairs=False) == False
    assert adapter.verify_outputs('train', use_filter=False,
                                  unique_pairs=True) == False

    adapter.clear_outputs()
    adapter.generate_outputs('train', use_filter=True, unique_pairs=True)
    assert adapter.verify_outputs('train',
                                  use_filter=False,
                                  unique_pairs=False) == False
    assert adapter.verify_outputs('train', use_filter=True,
                                  unique_pairs=True) == True
    assert adapter.verify_outputs('train', use_filter=True,
                                  unique_pairs=False) == False
    assert adapter.verify_outputs('train', use_filter=False,
                                  unique_pairs=True) == False

    # Test batch output shapes
    adapter.clear_outputs()
    train_iter = adapter.get_next_batch(batches_count=1,
                                        dataset_type='train',
                                        use_filter=True,
                                        unique_pairs=True)
    out, triples = next(train_iter)
    assert out.shape[0] == 2
    assert triples.shape[0] == 2

    adapter.clear_outputs()
    train_iter = adapter.get_next_batch(batches_count=1,
                                        dataset_type='train',
                                        use_filter=True,
                                        unique_pairs=False)
    out, triples = next(train_iter)
    assert out.shape[0] == 5
    assert triples.shape[0] == 5

    # Test batch subject corruptions
    batch_size = 3
    batch_iter = adapter.get_next_batch_subject_corruptions(
        batch_size=batch_size, dataset_type='train', use_filter=True)
    triples, out, out_onehot = next(batch_iter)

    assert np.all(
        X == triples
    )  # only 1 relation in X, so triples should be the same (ignores batch_size)
    assert triples.shape[1] == 3  # triples should be triples!
    assert out.shape[1] == 3  # batch out should also be triples!
    assert out.shape[
        0] == batch_size  # batch dimension of out should equal batch_size
    assert out_onehot.shape[
        0] == batch_size  # Onehot batch_dimension should equal batch size
    assert out_onehot.shape[0] == out.shape[
        0]  # .. and should be same size as number of unique entities
    assert out_onehot.shape[1] == len(adapter.ent_to_idx)

    adapter.clear_outputs()
    batch_iter = adapter.get_next_batch_subject_corruptions(
        batch_size=-1, dataset_type='train', use_filter=True)
    triples, out, out_onehot = next(batch_iter)

    assert np.all(
        X == triples)  # only 1 relation in X, so triples should be the same
    assert triples.shape[1] == 3  # triples should be triples!
    assert out.shape[1] == 3  # batch out should also be triples!
    assert out.shape[0] == len(
        adapter.ent_to_idx
    )  # batch_size=-1 (entire set), so the batch_dim should equal ents
    assert out_onehot.shape[0] == out_onehot.shape[
        1]  # Onehot should be a square matrix
    assert out_onehot.shape[0] == out.shape[
        0]  # .. and should be same size as number of unique entities
    assert out_onehot.shape[1] == len(adapter.ent_to_idx)

    # Verify that onehot outputs are as expected in the out array (or if not present in OF_Map then is a zero vector)
    for idx, (s, p, o) in enumerate(out):
        if (s, p) in OF_map.keys():
            onehot = OF_map[(s, p)]
            assert np.all(onehot == out_onehot[idx])
        else:
            assert np.all(out_onehot[idx] == 0)