Exemple #1
0
def test_evaluate_performance_TransE():
    X = load_wn18()
    model = TransE(batches_count=10,
                   seed=0,
                   epochs=100,
                   k=100,
                   eta=5,
                   optimizer_params={'lr': 0.1},
                   loss='pairwise',
                   loss_params={'margin': 5},
                   optimizer='adagrad')
    model.fit(np.concatenate((X['train'], X['valid'])))

    filter_triples = np.concatenate((X['train'], X['valid'], X['test']))
    ranks = evaluate_performance(X['test'][:200],
                                 model=model,
                                 filter_triples=filter_triples,
                                 verbose=True)

    # ranks = evaluate_performance(X['test'][:200], model=model)

    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("ranks: %s" % ranks)
    print("MRR: %f" % mrr)
    print("Hits@10: %f" % hits_10)
Exemple #2
0
def test_fit_predict_wn18_TransE():
    X = load_wn18()
    model = TransE(batches_count=1, seed=555, epochs=5, k=100, loss='pairwise', loss_params={'margin': 5},
                   verbose=True, optimizer='adagrad', optimizer_params={'lr': 0.1})
    model.fit(X['train'])
    y, _ = model.predict(X['test'][:1], get_ranks=True)

    print(y)
Exemple #3
0
def test_evaluate_performance_default_protocol_with_filter():
    wn18 = load_wn18()

    X_filter = np.concatenate((wn18['train'], wn18['valid'], wn18['test']))


    model = TransE(batches_count=10, seed=0, epochs=1, 
                    k=50, eta=10,  verbose=True, 
                    embedding_model_params={'normalize_ent_emb':False, 'norm':1},
                    loss = 'self_adversarial', loss_params={'margin':1, 'alpha':0.5}, 
                    optimizer='adam', 
                    optimizer_params={'lr':0.0005})

    model.fit(wn18['train'])


    from ampligraph.evaluation import evaluate_performance
    ranks_sep = []
    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(wn18['test'][::100], model, X_filter, verbose=True, corrupt_side='o',
                                 use_default_protocol=False)

    ranks_sep.extend(ranks)
    from ampligraph.evaluation import evaluate_performance

    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(wn18['test'][::100], model, X_filter, verbose=True, corrupt_side='s',
                                 use_default_protocol=False)
    ranks_sep.extend(ranks)
    print('----------EVAL WITH FILTER-----------------')
    print('----------Subj and obj corrupted separately-----------------')
    mr_sep = mr_score(ranks_sep)
    print('MAR:', mr_sep)
    print('Mrr:', mrr_score(ranks_sep))
    print('hits10:', hits_at_n_score(ranks_sep, 10))
    print('hits3:', hits_at_n_score(ranks_sep, 3))
    print('hits1:', hits_at_n_score(ranks_sep, 1))


    from ampligraph.evaluation import evaluate_performance

    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(wn18['test'][::100], model, X_filter, verbose=True, corrupt_side='s+o',
                                 use_default_protocol=True)
    print('----------corrupted with default protocol-----------------')
    mr_joint = mr_score(ranks)
    mrr_joint = mrr_score(ranks)
    print('MAR:', mr_joint)
    print('Mrr:', mrr_joint)
    print('hits10:', hits_at_n_score(ranks, 10))
    print('hits3:', hits_at_n_score(ranks, 3))
    print('hits1:', hits_at_n_score(ranks, 1))
    
    np.testing.assert_equal(mr_sep, mr_joint)
    assert(mrr_joint is not np.Inf)
Exemple #4
0
def test_fit_predict_TransE_early_stopping_without_filter():
    X = load_wn18()
    model = TransE(batches_count=1, seed=555, epochs=7, k=50, loss='pairwise', loss_params={'margin': 5},
                   verbose=True, optimizer='adagrad', optimizer_params={'lr':0.1})
    model.fit(X['train'], True, {'x_valid': X['valid'][::100], 
                                 'criteria':'mrr',
                                 'stop_interval': 2, 
                                 'burn_in':1, 
                                 'check_interval':2})
    
    y, _ = model.predict(X['test'][:1], get_ranks=True)
    print(y)
Exemple #5
0
def test_conve_bce_combo():
    # no exception
    model = ConvE(loss='bce')

    # no exception
    model = TransE(loss='nll')

    # Invalid combination. Hence exception.
    with pytest.raises(ValueError):
        model = TransE(loss='bce')

    # Invalid combination. Hence exception.
    with pytest.raises(ValueError):
        model = ConvE(loss='nll')
Exemple #6
0
def test_create_tensorboard_visualizations():
    # test if tensorflow API are still operative

    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'],
                  ['b', 'y', 'c'], ['f', 'y', 'e']])
    model = TransE(batches_count=1,
                   seed=555,
                   epochs=20,
                   k=10,
                   loss='pairwise',
                   loss_params={'margin': 5})
    model.fit(X)
    create_tensorboard_visualizations(model, 'tensorboard_files')
Exemple #7
0
def test_fit_predict_transE():
    model = TransE(batches_count=1, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 5}, 
                   optimizer='adagrad', optimizer_params={'lr':0.1})
    X = np.array([['a', 'y', 'b'],
                  ['b', 'y', 'a'],
                  ['a', 'y', 'c'],
                  ['c', 'y', 'a'],
                  ['a', 'y', 'd'],
                  ['c', 'y', 'd'],
                  ['b', 'y', 'c'],
                  ['f', 'y', 'e']])
    model.fit(X)
    y_pred, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True)
    print(y_pred)
    assert y_pred[0] > y_pred[1]
Exemple #8
0
def train_transe(train_samples: iter):
    model = TransE(batches_count=100,
                   seed=0,
                   epochs=200,
                   k=150,
                   eta=5,
                   optimizer='adam',
                   optimizer_params={'lr': 1e-3},
                   loss='multiclass_nll',
                   regularizer='LP',
                   regularizer_params={
                       'p': 3,
                       'lambda': 1e-5
                   },
                   verbose=True)
    model.fit(train_samples, early_stopping=False)
    return model
Exemple #9
0
def test_evaluate_performance_too_many_entities_warning():
    X = load_yago3_10()
    model = TransE(batches_count=200,
                   seed=0,
                   epochs=1,
                   k=5,
                   eta=1,
                   verbose=True)
    model.fit(X['train'])

    # no entity list declared
    with pytest.warns(UserWarning):
        evaluate_performance(X['test'][::100],
                             model,
                             verbose=True,
                             corrupt_side='o')

    # with larger than threshold entity list
    with pytest.warns(UserWarning):
        # TOO_MANY_ENT_TH threshold is set to 50,000 entities. Using explicit value to comply with linting
        # and thus avoiding exporting unused global variable.
        entities_subset = np.union1d(np.unique(X["train"][:, 0]),
                                     np.unique(X["train"][:, 2]))[:50000]
        evaluate_performance(X['test'][::100],
                             model,
                             verbose=True,
                             corrupt_side='o',
                             entities_subset=entities_subset)

    # with small entity list (no exception expected)
    evaluate_performance(X['test'][::100],
                         model,
                         verbose=True,
                         corrupt_side='o',
                         entities_subset=entities_subset[:10])

    # with smaller dataset, no entity list declared (no exception expected)
    X_wn18rr = load_wn18rr()
    model_wn18 = TransE(batches_count=200,
                        seed=0,
                        epochs=1,
                        k=5,
                        eta=1,
                        verbose=True)
    model_wn18.fit(X_wn18rr['train'])
    evaluate_performance(X_wn18rr['test'][::100],
                         model_wn18,
                         verbose=True,
                         corrupt_side='o')
Exemple #10
0
 def perform_test():
     X = load_wn18rr()
     k = 5
     unique_entities = np.unique(
         np.concatenate([X['train'][:, 0], X['train'][:, 2]], 0))
     unique_relations = np.unique(X['train'][:, 1])
     model = TransE(batches_count=100,
                    seed=555,
                    epochs=1,
                    k=k,
                    loss='multiclass_nll',
                    loss_params={'margin': 5},
                    verbose=True,
                    optimizer='sgd',
                    optimizer_params={'lr': 0.001})
     model.fit(X['train'])
     # verify ent and rel shapes
     assert (model.trained_model_params[0].shape[0] == len(unique_entities))
     assert (
         model.trained_model_params[1].shape[0] == len(unique_relations))
     # verify k
     assert (model.trained_model_params[0].shape[1] == k)
     assert (model.trained_model_params[1].shape[1] == k)
    embedding_model_params={'norm': DEFAULT_NORM_TRANSE,
                         'normalize_ent_emb': DEFAULT_NORMALIZE_EMBEDDINGS,
                         'negative_corruption_entities': DEFAULT_CORRUPTION_ENTITIES,
                         'corrupt_sides': DEFAULT_CORRUPT_SIDE_TRAIN},
    optimizer=DEFAULT_OPTIM,
    optimizer_params={'lr': DEFAULT_LR},
    loss=DEFAULT_LOSS,
    loss_params={},
    regularizer=DEFAULT_REGULARIZER,
    regularizer_params={},
    initializer=DEFAULT_INITIALIZER,
    initializer_params={'uniform': DEFAULT_XAVIER_IS_UNIFORM},
    verbose=DEFAULT_VERBOSE):
    """

    model = TransE(verbose=True, k=70, epochs=40)
    """
    model = ComplEx(batches_count=10, seed=0, epochs=60, k=50, eta=10,
                    # Use adam optimizer with learning rate 1e-3
                    optimizer='adam', optimizer_params={'lr': 1e-3},
                    # Use pairwise loss with margin 0.5
                    loss='pairwise', loss_params={'margin': 0.5},
                    # Use L2 regularizer with regularizer weight 1e-5
                    regularizer='LP', regularizer_params={'p': 2, 'lambda': 1e-5},
                    # Enable stdout messages (set to false if you don't want to display)
                    verbose=True)"""

    print("Training...")
    x_orig = load_wn18()
    model.fit(X_train)
        if r[0] in known_entities and r[2] in known_entities
    ])
    X_train, X_valid = X['train'], X['valid']
    print('Train set size: ', X_train.shape)
    print('Test set size: ', X_valid.shape)
    ke_kwargs = {"verbose": True, "k": 70, "epochs": 100}

    # ComplEx brings double dimensions because of the twofold nature of complex numbers
    model = ComplEx(**ke_kwargs)
    print("Training...")
    model.fit(X_train)
    save_model(model, model_name_path=ke_model_path)
    # If we don't transpose the multidimensionality of the embeddings to 3D but take just 3-D-embeddings,
    # This can't be with ComplEX because, it will be an even number and 3 is not
    ke_kwargs['k'] = 3
    model2 = TransE(**ke_kwargs)
    model2.fit(X_train)
    save_model(model2, model_name_path=ke_model_path + '2')
else:
    model = restore_model(model_name_path=ke_model_path)
    model2 = restore_model(model_name_path=ke_model_path + '2')
    with open(ke_wnkeys_path, 'rb') as handle:
        tok2id, id2tok = pickle.load(handle)


def find_in_tok2id(w):
    for s in tok2id.keys():
        if w in s:
            print(w, s, "it is alphabetically there")

"""---
# 3. Training TransE model
"""

import tensorflow
print(tensorflow.__version__)

from ampligraph.latent_features import TransE

model = TransE(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

positives_filter = X

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

model.fit(data['train'], early_stopping = False)

"""---
# 4.  Saving and restoring a model
 print("------------------------------------------------")
 print("%d) Implementation Model: %s" % (1, mdl[j]))
 print("------------------------------------------------")
 start_time = time.time()  # START: Training Time Tracker    
 K.clear_session()  # Kills current TF comp-graph & creates a new one
 
 if (mdl[j] == "ComplEx"):
     model = ComplEx(verbose=True)
 elif (mdl[j] == "ConvKB"):
     model = ConvKB(verbose=True)
 elif (mdl[j] == "DistMult"):
     model = DistMult(verbose=True)
 elif (mdl[j] == "HolE"):
     model = HolE(verbose=True)
 elif (mdl[j] == "TransE"):
     model = TransE(verbose=True)
 elif (mdl[j] == "RandomBaseline"):
     model = RandomBaseline(verbose=True)
 tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)  # TensorFlow will tell you all messages that have the label ERROR
 model.fit(train_X)
 
 # Save model at its best-performance point
 save_model(model, 'best_ampliGraph_model.pkl')
 del model  # Delete older model
 # Load recently save best-performance model
 model = restore_model('./best_ampliGraph_model.pkl')    
 if model.is_fitted:
     print('The model is fit!')
 else:
     print('The model is not fit! Did you skip a step?')
 
Exemple #15
0
    embedding_model_params={'norm': DEFAULT_NORM_TRANSE,
                         'normalize_ent_emb': DEFAULT_NORMALIZE_EMBEDDINGS,
                         'negative_corruption_entities': DEFAULT_CORRUPTION_ENTITIES,
                         'corrupt_sides': DEFAULT_CORRUPT_SIDE_TRAIN},
    optimizer=DEFAULT_OPTIM,
    optimizer_params={'lr': DEFAULT_LR},
    loss=DEFAULT_LOSS,
    loss_params={},
    regularizer=DEFAULT_REGULARIZER,
    regularizer_params={},
    initializer=DEFAULT_INITIALIZER,
    initializer_params={'uniform': DEFAULT_XAVIER_IS_UNIFORM},
    verbose=DEFAULT_VERBOSE):
    """

    model = TransE(verbose=True, k=70, epochs=40)
    """pyt
    model = ComplEx(batches_count=10, seed=0, epochs=60, k=50, eta=10,
                    # Use adam optimizer with learning rate 1e-3
                    optimizer='adam', optimizer_params={'lr': 1e-3},
                    # Use pairwise loss with margin 0.5
                    loss='pairwise', loss_params={'margin': 0.5},
                    # Use L2 regularizer with regularizer weight 1e-5
                    regularizer='LP', regularizer_params={'p': 2, 'lambda': 1e-5},
                    # Enable stdout messages (set to false if you don't want to display)
                    verbose=True)"""

    print("Training...")
    x_orig = load_wn18()
    model.fit(X_train)
    save_model(model, model_name_path=ke_model_path)
Exemple #16
0
# # Load target entities
target_entities = tes.load_from_file(
    '../example_data/imdb/imdb_target_entities')
print(target_entities.get_entities()[:10])

##### RUN ONLY ONCE ######
##################################
# Ampligraph embedding model (train new model)
###################################
model = TransE(batches_count=100,
               seed=555,
               epochs=100,
               k=100,
               loss='pairwise',
               optimizer='sgd',
               loss_params={
                   'margin': 1.0,
                   'normalize_ent_emb': True
               },
               verbose=True)
model.fit(kg_triples.as_numpy_array())
# Save model for later usage, the it can be reloaded using load_model(os.path.join(experiment_dir,'model_transE.pkl'))
save_model(model, os.path.join(out_dir, 'imdb_transE.pkl'))
##################### End

########### ALTERNATIVE #####################
## OR ## Relaoad a pretrained model
#############################
# Restore models trained using our modified restore  model function
######################
Exemple #17
0
def select_kge(kge_name, batch_size, epochs, seed, verbose):
    model = ''
    # Select kge_name
    if kge_name == 'complex':
        # ComplEx model
        model = ComplEx(
            batches_count=batch_size,
            epochs=epochs,
            k=150,
            eta=20,
            optimizer='adam',
            optimizer_params={'margin':
                              5},  #,'lr':learning_rate}, # default lr:0.1
            loss='multiclass_nll',
            loss_params={},
            regularizer='LP',
            regularizer_params={
                'p': 2,
                'lambda': 1e-4
            },
            seed=seed,
            verbose=verbose)
    elif kge_name == 'hole':
        # HolE model
        model = HolE(batches_count=batch_size,
                     epochs=epochs,
                     k=100,
                     eta=20,
                     optimizer='adam',
                     optimizer_params={'lr': learning_rate},
                     loss='multiclass_nll',
                     regularizer='LP',
                     regularizer_params={
                         'p': 3,
                         'lambda': 1e-5
                     },
                     seed=seed,
                     verbose=verbose)
    elif kge_name == 'transe':
        # TransE model
        model = TransE(
            batches_count=batch_size,
            epochs=epochs,
            k=350,
            eta=20,
            optimizer='adam',
            optimizer_params={'margin':
                              5},  #,'lr':learning_rate}, # default lr:0.1
            loss='multiclass_nll',  #loss='pairwise',
            loss_params={},  #loss_params={'margin:5'},
            regularizer='LP',
            regularizer_params={
                'p': 2,
                'lambda': 1e-4
            },
            seed=seed,
            verbose=verbose)
    else:
        sys.exit('Given kge_name is not valid.')

    return model