Beispiel #1
0
def test_evaluate_performance_TransE():
    X = load_wn18()
    model = TransE(batches_count=10, seed=0, epochs=100, k=100, eta=5, optimizer_params={'lr': 0.1},
                   loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad')
    model.fit(np.concatenate((X['train'], X['valid'])))

    filter_triples = np.concatenate((X['train'], X['valid'], X['test']))
    ranks = evaluate_performance(X['test'][:200], model=model, filter_triples=filter_triples, verbose=True)

    # ranks = evaluate_performance(X['test'][:200], model=model)

    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("ranks: %s" % ranks)
    print("MRR: %f" % mrr)
    print("Hits@10: %f" % hits_10)
Beispiel #2
0
def test_fit_predict_transE():
    model = TransE(batches_count=1,
                   seed=555,
                   epochs=20,
                   k=10,
                   loss='pairwise',
                   loss_params={'margin': 5},
                   optimizer='adagrad',
                   optimizer_params={'lr': 0.1})
    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'],
                  ['b', 'y', 'c'], ['f', 'y', 'e']])
    model.fit(X)
    y_pred = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]))
    print(y_pred)
    assert y_pred[0] > y_pred[1]
Beispiel #3
0
def test_evaluate_performance_default_protocol_without_filter():
    wn18 = load_wn18()

    model = TransE(batches_count=10, seed=0, epochs=1,
                   k=50, eta=10,  verbose=True,
                   embedding_model_params={'normalize_ent_emb':False, 'norm': 1},
                   loss='self_adversarial', loss_params={'margin': 1, 'alpha': 0.5},
                   optimizer='adam',
                   optimizer_params={'lr': 0.0005})

    model.fit(wn18['train'])

    from ampligraph.evaluation import evaluate_performance
    ranks_sep = []
    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(wn18['test'][::100], model, verbose=True, corrupt_side='o')

    ranks_sep.extend(ranks)
    from ampligraph.evaluation import evaluate_performance

    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(wn18['test'][::100], model, verbose=True, corrupt_side='s')
    ranks_sep.extend(ranks)
    print('----------EVAL WITHOUT FILTER-----------------')
    print('----------Subj and obj corrupted separately-----------------')
    mr_sep = mr_score(ranks_sep)
    print('MAR:', mr_sep)
    print('Mrr:', mrr_score(ranks_sep))
    print('hits10:', hits_at_n_score(ranks_sep, 10))
    print('hits3:', hits_at_n_score(ranks_sep, 3))
    print('hits1:', hits_at_n_score(ranks_sep, 1))

    from ampligraph.evaluation import evaluate_performance

    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(wn18['test'][::100], model, verbose=True, corrupt_side='s,o')
    print('----------corrupted with default protocol-----------------')
    mr_joint = mr_score(ranks)
    mrr_joint = mrr_score(ranks)
    print('MAR:', mr_joint)
    print('Mrr:', mrr_score(ranks))
    print('hits10:', hits_at_n_score(ranks, 10))
    print('hits3:', hits_at_n_score(ranks, 3))
    print('hits1:', hits_at_n_score(ranks, 1))
    
    np.testing.assert_equal(mr_sep, mr_joint)
    assert(mrr_joint is not np.Inf)
Beispiel #4
0
def train_transe(train_samples: iter):
    model = TransE(batches_count=100,
                   seed=0,
                   epochs=200,
                   k=150,
                   eta=5,
                   optimizer='adam',
                   optimizer_params={'lr': 1e-3},
                   loss='multiclass_nll',
                   regularizer='LP',
                   regularizer_params={
                       'p': 3,
                       'lambda': 1e-5
                   },
                   verbose=True)
    model.fit(train_samples, early_stopping=False)
    return model
Beispiel #5
0
def test_fit_predict_TransE_early_stopping_without_filter():
    X = load_wn18()
    model = TransE(batches_count=1,
                   seed=555,
                   epochs=7,
                   k=50,
                   loss='pairwise',
                   loss_params={'margin': 5},
                   verbose=True,
                   optimizer='adagrad',
                   optimizer_params={'lr': 0.1})
    model.fit(
        X['train'], True, {
            'x_valid': X['valid'][::100],
            'criteria': 'mrr',
            'stop_interval': 2,
            'burn_in': 1,
            'check_interval': 2
        })

    y = model.predict(X['test'][:1])
    print(y)
Beispiel #6
0
 def perform_test():
     X = load_wn18rr()
     k = 5
     unique_entities = np.unique(
         np.concatenate([X['train'][:, 0], X['train'][:, 2]], 0))
     unique_relations = np.unique(X['train'][:, 1])
     model = TransE(batches_count=100,
                    seed=555,
                    epochs=1,
                    k=k,
                    loss='multiclass_nll',
                    loss_params={'margin': 5},
                    verbose=True,
                    optimizer='sgd',
                    optimizer_params={'lr': 0.001})
     model.fit(X['train'])
     # verify ent and rel shapes
     assert (model.trained_model_params[0].shape[0] == len(unique_entities))
     assert (
         model.trained_model_params[1].shape[0] == len(unique_relations))
     # verify k
     assert (model.trained_model_params[0].shape[1] == k)
     assert (model.trained_model_params[1].shape[1] == k)
Beispiel #7
0
def test_evaluate_performance_too_many_entities_warning():
    X = load_yago3_10()
    model = TransE(batches_count=200, seed=0, epochs=1, k=5, eta=1, verbose=True)
    model.fit(X['train'])

    # no entity list declared
    with pytest.warns(UserWarning):
        evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o')

    # with larger than threshold entity list
    with pytest.warns(UserWarning):
        # TOO_MANY_ENT_TH threshold is set to 50,000 entities. Using explicit value to comply with linting
        # and thus avoiding exporting unused global variable.
        entities_subset = np.union1d(np.unique(X["train"][:, 0]), np.unique(X["train"][:, 2]))[:50000]
        evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o', entities_subset=entities_subset)

    # with small entity list (no exception expected)
    evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o', entities_subset=entities_subset[:10])

    # with smaller dataset, no entity list declared (no exception expected)
    X_wn18rr = load_wn18rr()
    model_wn18 = TransE(batches_count=200, seed=0, epochs=1, k=5, eta=1, verbose=True)
    model_wn18.fit(X_wn18rr['train'])
    evaluate_performance(X_wn18rr['test'][::100], model_wn18, verbose=True, corrupt_side='o')
    model = TransE(verbose=True, k=70, epochs=40)
    """
    model = ComplEx(batches_count=10, seed=0, epochs=60, k=50, eta=10,
                    # Use adam optimizer with learning rate 1e-3
                    optimizer='adam', optimizer_params={'lr': 1e-3},
                    # Use pairwise loss with margin 0.5
                    loss='pairwise', loss_params={'margin': 0.5},
                    # Use L2 regularizer with regularizer weight 1e-5
                    regularizer='LP', regularizer_params={'p': 2, 'lambda': 1e-5},
                    # Enable stdout messages (set to false if you don't want to display)
                    verbose=True)"""

    print("Training...")
    x_orig = load_wn18()
    model.fit(X_train)

    save_model(model, model_name_path=ke_model_path)

    model2 = TransE(verbose=True, k=3, epochs=40)
    model2.fit(X_train)
    save_model(model2, model_name_path=ke_model_path + '2')

    #filter_triples = np.concatenate((X_train, X_valid))
    #filter = np.concatenate((X['train'], X['valid'], X['test']))
    #ranks = evaluate_performance(X['test'],
    #                             model=model,
    #                             filter_triples=filter,
    #                             use_default_protocol=True,  # corrupt subj and obj separately while evaluating
    #                             verbose=True)
Beispiel #9
0
##### RUN ONLY ONCE ######
##################################
# Ampligraph embedding model (train new model)
###################################
model = TransE(batches_count=100,
               seed=555,
               epochs=100,
               k=100,
               loss='pairwise',
               optimizer='sgd',
               loss_params={
                   'margin': 1.0,
                   'normalize_ent_emb': True
               },
               verbose=True)
model.fit(kg_triples.as_numpy_array())
# Save model for later usage, the it can be reloaded using load_model(os.path.join(experiment_dir,'model_transE.pkl'))
save_model(model, os.path.join(out_dir, 'imdb_transE.pkl'))
##################### End

########### ALTERNATIVE #####################
## OR ## Relaoad a pretrained model
#############################
# Restore models trained using our modified restore  model function
######################
# model=restore_model(os.path.join('/scratch/GW/pool0/gadelrab/multicut/output', 'yago_transE.pkl'))

# Get vectors
# print(model.ent_to_idx.items())
# print(target_entities.get_entities()[:30])
missing = list(
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

positives_filter = X

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

model.fit(data['train'], early_stopping = False)

"""---
# 4.  Saving and restoring a model
"""

from ampligraph.latent_features import save_model, restore_model

save_model(model, './best_model.pkm')

del model

model = restore_model('./best_model.pkm')

if model.is_fitted:
    print('The model is fit!')