def test_find_neighbors(): model = DistMult(batches_count=2, seed=555, epochs=1, k=10, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad', optimizer_params={'lr': 0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['e', 'y', 'c'], ['c', 'z', 'a'], ['a', 'z', 'd'], ['f', 'z', 'g'], ['c', 'z', 'g']]) with pytest.raises(AssertionError) as e: neighbors, dist = find_nearest_neighbours( model, entities=['b'], n_neighbors=3, entities_subset=['a', 'c', 'd', 'e', 'f']) assert str(e.value) == "KGE model is not fit!" model.fit(X) neighbors, dist = find_nearest_neighbours( model, entities=['b'], n_neighbors=3, entities_subset=['a', 'c', 'd', 'e', 'f']) assert np.all(neighbors == [['e', 'd', 'c']]) with pytest.raises(AssertionError) as e: neighbors, dist = find_nearest_neighbours( model, entities=['b'], n_neighbors=30, entities_subset=['a', 'c', 'd', 'e', 'f']) assert str( e.value ) == "n_neighbors must be less than the number of entities being fit!" with pytest.raises(AssertionError) as e: neighbors, dist = find_nearest_neighbours(model, entities=['b'], n_neighbors=3, entities_subset='a') assert str( e.value ) == "Invalid type for entities_subset! Must be a list or np.array" with pytest.raises(AssertionError) as e: neighbors, dist = find_nearest_neighbours( model, entities='b', n_neighbors=3, entities_subset=['a', 'c', 'd', 'e', 'f']) assert str( e.value) == "Invalid type for entities! Must be a list or np.array"
def test_fit_predict_DistMult(): model = DistMult(batches_count=2, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad', optimizer_params={'lr':0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model.fit(X) y_pred, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True) print(y_pred) assert y_pred[0] > y_pred[1]
def test_is_fitted_on(): model = DistMult(batches_count=2, seed=555, epochs=1, k=10, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad', optimizer_params={'lr': 0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'z', 'a'], ['a', 'z', 'd']]) model.fit(X) X1 = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'z', 'a'], ['g', 'z', 'd']]) X2 = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'z', 'a'], ['a', 'x', 'd']]) # Fits the train triples assert model.is_fitted_on(X) is True # Doesn't fit the extra entity triples assert model.is_fitted_on(X1) is False # Doesn't fit the extra relationship triples assert model.is_fitted_on(X2) is False
def test_predict_twice(): model = DistMult(batches_count=2, seed=555, epochs=1, k=10, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad', optimizer_params={'lr': 0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'z', 'a'], ['a', 'z', 'd']]) model.fit(X) X_test1 = np.array([['a', 'y', 'b'], ['b', 'y', 'a']]) X_test2 = np.array([['a', 'y', 'c'], ['c', 'z', 'a']]) preds1 = model.predict(X_test1) preds2 = model.predict(X_test2) assert not np.array_equal(preds1, preds2)
def test_lookup_embeddings(): model = DistMult(batches_count=2, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad', optimizer_params={'lr':0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model.fit(X) model.get_embeddings(['a', 'b'], embedding_type='entity')
def test_predict(): model = DistMult(batches_count=2, seed=555, epochs=1, k=10, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad', optimizer_params={'lr': 0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'z', 'a'], ['a', 'z', 'd']]) model.fit(X) preds1 = model.predict(X) preds2 = model.predict(to_idx(X, model.ent_to_idx, model.rel_to_idx), from_idx=True) np.testing.assert_array_equal(preds1, preds2)
"""--- # 3. Training ComplEx model """ import tensorflow print(tensorflow.__version__) from ampligraph.latent_features import DistMult model = DistMult(batches_count=100, seed=0, epochs=200, k=150, eta=5, optimizer='adam', optimizer_params={'lr':1e-3}, loss='multiclass_nll', regularizer='LP', regularizer_params={'p':3, 'lambda':1e-5}, verbose=True) positives_filter = X import tensorflow as tf tf.logging.set_verbosity(tf.logging.ERROR) model.fit(data['train'], early_stopping = False) """--- # 4. Saving and restoring a model
def test_calibrate_with_negatives(): model = DistMult(batches_count=2, seed=555, epochs=1, k=10, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad', optimizer_params={'lr': 0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'z', 'a'], ['a', 'z', 'd']]) model.fit(X) X_pos = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'z', 'a'], ['d', 'z', 'd']]) X_neg = np.array([['a', 'y', 'd'], ['d', 'y', 'a'], ['c', 'y', 'a'], ['a', 'z', 'd']]) with pytest.raises(RuntimeError): model.predict_proba(X_pos) with pytest.raises(ValueError): model.calibrate(X_pos, X_neg, positive_base_rate=50, batches_count=2, epochs=10) model.calibrate(X_pos, X_neg, batches_count=2, epochs=10) probas = model.predict_proba(np.concatenate((X_pos, X_neg))) assert np.logical_and(probas > 0, probas < 1).all()
# Fit & Train model via ampliGraph library log_key = mdl[j]+": "+graph_data[i] log_file = open("eval_log.txt", "a") print("\n\n----"+log_key+"----", file=log_file) print("------------------------------------------------") print("%d) Implementation Model: %s" % (1, mdl[j])) print("------------------------------------------------") start_time = time.time() # START: Training Time Tracker K.clear_session() # Kills current TF comp-graph & creates a new one if (mdl[j] == "ComplEx"): model = ComplEx(verbose=True) elif (mdl[j] == "ConvKB"): model = ConvKB(verbose=True) elif (mdl[j] == "DistMult"): model = DistMult(verbose=True) elif (mdl[j] == "HolE"): model = HolE(verbose=True) elif (mdl[j] == "TransE"): model = TransE(verbose=True) elif (mdl[j] == "RandomBaseline"): model = RandomBaseline(verbose=True) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # TensorFlow will tell you all messages that have the label ERROR model.fit(train_X) # Save model at its best-performance point save_model(model, 'best_ampliGraph_model.pkl') del model # Delete older model # Load recently save best-performance model model = restore_model('./best_ampliGraph_model.pkl') if model.is_fitted: