Ejemplo n.º 1
0
def test_calibrate_with_negatives():
    model = DistMult(batches_count=2,
                     seed=555,
                     epochs=1,
                     k=10,
                     loss='pairwise',
                     loss_params={'margin': 5},
                     optimizer='adagrad',
                     optimizer_params={'lr': 0.1})

    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'z', 'a'], ['a', 'z', 'd']])
    model.fit(X)

    X_pos = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                      ['c', 'z', 'a'], ['d', 'z', 'd']])

    X_neg = np.array([['a', 'y', 'd'], ['d', 'y', 'a'], ['c', 'y', 'a'],
                      ['a', 'z', 'd']])

    with pytest.raises(RuntimeError):
        model.predict_proba(X_pos)

    with pytest.raises(ValueError):
        model.calibrate(X_pos,
                        X_neg,
                        positive_base_rate=50,
                        batches_count=2,
                        epochs=10)

    model.calibrate(X_pos, X_neg, batches_count=2, epochs=10)

    probas = model.predict_proba(np.concatenate((X_pos, X_neg)))

    assert np.logical_and(probas > 0, probas < 1).all()
Ejemplo n.º 2
0
def test_is_fitted_on():

    model = DistMult(batches_count=2,
                     seed=555,
                     epochs=1,
                     k=10,
                     loss='pairwise',
                     loss_params={'margin': 5},
                     optimizer='adagrad',
                     optimizer_params={'lr': 0.1})
    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'z', 'a'], ['a', 'z', 'd']])

    model.fit(X)

    X1 = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                   ['c', 'z', 'a'], ['g', 'z', 'd']])

    X2 = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                   ['c', 'z', 'a'], ['a', 'x', 'd']])

    # Fits the train triples
    assert model.is_fitted_on(X) is True
    # Doesn't fit the extra entity triples
    assert model.is_fitted_on(X1) is False
    # Doesn't fit the extra relationship triples
    assert model.is_fitted_on(X2) is False
Ejemplo n.º 3
0
def test_find_neighbors():
    model = DistMult(batches_count=2,
                     seed=555,
                     epochs=1,
                     k=10,
                     loss='pairwise',
                     loss_params={'margin': 5},
                     optimizer='adagrad',
                     optimizer_params={'lr': 0.1})
    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['e', 'y', 'c'],
                  ['c', 'z', 'a'], ['a', 'z', 'd'], ['f', 'z', 'g'],
                  ['c', 'z', 'g']])
    with pytest.raises(AssertionError) as e:
        neighbors, dist = find_nearest_neighbours(
            model,
            entities=['b'],
            n_neighbors=3,
            entities_subset=['a', 'c', 'd', 'e', 'f'])

    assert str(e.value) == "KGE model is not fit!"
    model.fit(X)
    neighbors, dist = find_nearest_neighbours(
        model,
        entities=['b'],
        n_neighbors=3,
        entities_subset=['a', 'c', 'd', 'e', 'f'])
    assert np.all(neighbors == [['e', 'd', 'c']])

    with pytest.raises(AssertionError) as e:
        neighbors, dist = find_nearest_neighbours(
            model,
            entities=['b'],
            n_neighbors=30,
            entities_subset=['a', 'c', 'd', 'e', 'f'])
    assert str(
        e.value
    ) == "n_neighbors must be less than the number of entities being fit!"

    with pytest.raises(AssertionError) as e:
        neighbors, dist = find_nearest_neighbours(model,
                                                  entities=['b'],
                                                  n_neighbors=3,
                                                  entities_subset='a')
    assert str(
        e.value
    ) == "Invalid type for entities_subset! Must be a list or np.array"

    with pytest.raises(AssertionError) as e:
        neighbors, dist = find_nearest_neighbours(
            model,
            entities='b',
            n_neighbors=3,
            entities_subset=['a', 'c', 'd', 'e', 'f'])
    assert str(
        e.value) == "Invalid type for entities! Must be a list or np.array"
Ejemplo n.º 4
0
def test_lookup_embeddings():
    model = DistMult(batches_count=2, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 5}, 
                     optimizer='adagrad', optimizer_params={'lr':0.1})
    X = np.array([['a', 'y', 'b'],
                  ['b', 'y', 'a'],
                  ['a', 'y', 'c'],
                  ['c', 'y', 'a'],
                  ['a', 'y', 'd'],
                  ['c', 'y', 'd'],
                  ['b', 'y', 'c'],
                  ['f', 'y', 'e']])
    model.fit(X)
    model.get_embeddings(['a', 'b'], embedding_type='entity')
Ejemplo n.º 5
0
def test_fit_predict_DistMult():
    model = DistMult(batches_count=2, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 5}, 
                     optimizer='adagrad', optimizer_params={'lr':0.1})
    X = np.array([['a', 'y', 'b'],
                  ['b', 'y', 'a'],
                  ['a', 'y', 'c'],
                  ['c', 'y', 'a'],
                  ['a', 'y', 'd'],
                  ['c', 'y', 'd'],
                  ['b', 'y', 'c'],
                  ['f', 'y', 'e']])
    model.fit(X)
    y_pred, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True)
    print(y_pred)
    assert y_pred[0] > y_pred[1]
Ejemplo n.º 6
0
def test_predict():
    model = DistMult(batches_count=2,
                     seed=555,
                     epochs=1,
                     k=10,
                     loss='pairwise',
                     loss_params={'margin': 5},
                     optimizer='adagrad',
                     optimizer_params={'lr': 0.1})
    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'z', 'a'], ['a', 'z', 'd']])
    model.fit(X)

    preds1 = model.predict(X)
    preds2 = model.predict(to_idx(X, model.ent_to_idx, model.rel_to_idx),
                           from_idx=True)

    np.testing.assert_array_equal(preds1, preds2)
Ejemplo n.º 7
0
def test_predict_twice():
    model = DistMult(batches_count=2,
                     seed=555,
                     epochs=1,
                     k=10,
                     loss='pairwise',
                     loss_params={'margin': 5},
                     optimizer='adagrad',
                     optimizer_params={'lr': 0.1})
    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'z', 'a'], ['a', 'z', 'd']])
    model.fit(X)

    X_test1 = np.array([['a', 'y', 'b'], ['b', 'y', 'a']])

    X_test2 = np.array([['a', 'y', 'c'], ['c', 'z', 'a']])

    preds1 = model.predict(X_test1)
    preds2 = model.predict(X_test2)

    assert not np.array_equal(preds1, preds2)
Ejemplo n.º 8
0
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

positives_filter = X

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

model.fit(data['train'], early_stopping = False)

"""---
# 4.  Saving and restoring a model
"""

from ampligraph.latent_features import save_model, restore_model

save_model(model, './best_model.pkn')

del model

model = restore_model('./best_model.pkn')

if model.is_fitted:
    print('The model is fit!')