Exemple #1
0
def predict_missing_links(train_file_path, evaluation_file_path, model_path,
                          tensorboard_visualizations_path):
    graph = load_from_csv('.', train_file_path, sep=',')
    evaluation_samples = load_from_csv('.', evaluation_file_path, sep=',')

    print('Head of the loaded graph: ')
    print(graph[:5])

    train_samples, test_samples = split(graph)
    print(
        f'Divided into train and test subsets with shapes {train_samples.shape} and {test_samples.shape} respectively.'
    )

    if not os.path.isfile(model_path):
        model = train_transe(train_samples)  # train_complex(train_samples)
        save_model(model, model_path)
    else:
        model = restore_model(model_path)

    metrics = compute_metrics(model, train_samples, test_samples)
    print(f'{"metric":10s}: {"score":5s}')
    for metric, score in metrics.items():
        print(f'{metric:10s}: {score:<5.2f}')

    scores, ranks = score_samples(model, evaluation_samples, train_samples)
    evaluation_summary = summarize(scores, evaluation_samples, ranks)

    print(evaluation_summary)

    if tensorboard_visualizations_path:
        os.makedirs(tensorboard_visualizations_path, exist_ok=True)
        create_tensorboard_visualizations(model,
                                          tensorboard_visualizations_path)
Exemple #2
0
def test_convkb_save_restore():

    model = ConvKB(batches_count=2,
                   seed=22,
                   epochs=1,
                   k=10,
                   eta=1,
                   embedding_model_params={
                       'num_filters': 16,
                       'filter_sizes': [1],
                       'dropout': 0.0,
                       'is_trainable': True
                   },
                   optimizer='adam',
                   optimizer_params={'lr': 0.001},
                   loss='pairwise',
                   loss_params={},
                   verbose=True)

    X = load_wn18()
    model.fit(X['train'])
    y1 = model.predict(X['test'][:10])

    save_model(model, 'convkb.tmp')
    del model
    model = restore_model('convkb.tmp')

    y2 = model.predict(X['test'][:10])

    assert np.all(y1 == y2)

    os.remove('convkb.tmp')
Exemple #3
0
def test_conve_evaluation_protocol():
    X = load_wn18()
    model = ConvE(batches_count=200,
                  seed=22,
                  epochs=1,
                  k=10,
                  embedding_model_params={
                      'conv_filters': 16,
                      'conv_kernel_size': 3
                  },
                  optimizer='adam',
                  optimizer_params={'lr': 0.01},
                  loss='bce',
                  loss_params={},
                  regularizer=None,
                  regularizer_params={
                      'p': 2,
                      'lambda': 1e-5
                  },
                  verbose=True,
                  low_memory=True)

    model.fit(X['train'])

    y1 = model.predict(X['test'][:5])

    save_model(model, 'model.tmp')
    del model
    model = restore_model('model.tmp')

    y2 = model.predict(X['test'][:5])

    assert np.all(y1 == y2)

    os.remove('model.tmp')
Exemple #4
0
def kge(triples, kge_name, epochs, batch_size, learning_rate, seed, verbose):
    kge_name = parsed_args.kge
    kge_model_savepath = f'./temp/ampligraph.model'

    if not os.path.isfile(kge_model_savepath):
        #Embedding evaluation
        if verbose:
            # Train test split
            t_size = math.ceil(len(triples) * 0.2)
            X_train, X_test = train_test_split_no_unseen(triples,
                                                         test_size=t_size)

            eval_model = select_kge(kge_name, batch_size, epochs, seed,
                                    verbose)

            eval_model.fit(X_train)
            filter_triples = np.concatenate((X_train, X_test))
            ranks = evaluate_performance(X_test,
                                         model=eval_model,
                                         filter_triples=filter_triples,
                                         use_default_protocol=True,
                                         verbose=True)

            mrr = mrr_score(ranks)
            print("MRR: %.2f" % (mrr))
            mr = mr_score(ranks)
            print("MR: %.2f" % (mr))
            hits_10 = hits_at_n_score(ranks, n=10)
            print("Hits@10: %.2f" % (hits_10))
            hits_3 = hits_at_n_score(ranks, n=3)
            print("Hits@3: %.2f" % (hits_3))
            hits_1 = hits_at_n_score(ranks, n=1)
            print("Hits@1: %.2f" % (hits_1))

            print('''
            - Ampligraph example -
            MRR: 0.25
            MR: 4927.33
            Hits@10: 0.35
            Hits@3: 0.28
            Hits@1: 0.19
            ''')

        model = select_kge(kge_name, batch_size, epochs, seed, verbose)

        print('Training...')
        model.fit(np.array(triples))
        save_model(model, model_name_path=kge_model_savepath)
    else:
        model = restore_model(model_name_path=kge_model_savepath)

    return model
Exemple #5
0
def test_save_and_restore_model():
    models = ('ComplEx', 'TransE', 'DistMult')

    for model_name in models:
        module = importlib.import_module("ampligraph.latent_features.models")

        print('Doing save/restore testing for model class: ', model_name)

        class_ = getattr(module, model_name)

        model = class_(batches_count=2,
                       seed=555,
                       epochs=20,
                       k=10,
                       optimizer='adagrad',
                       optimizer_params={'lr': 0.1})

        X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                      ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'],
                      ['b', 'y', 'c'], ['f', 'y', 'e']])

        model.fit(X)

        example_name = 'helloworld.pkl'

        save_model(model, model_name_path=example_name)

        loaded_model = restore_model(model_name_path=example_name)

        assert loaded_model != None
        assert loaded_model.all_params == model.all_params
        assert loaded_model.is_fitted == model.is_fitted
        assert loaded_model.ent_to_idx == model.ent_to_idx
        assert loaded_model.rel_to_idx == model.rel_to_idx

        for i in range(len(loaded_model.trained_model_params)):
            npt.assert_array_equal(loaded_model.trained_model_params[i],
                                   model.trained_model_params[i])

        y_pred_before, _ = model.predict(np.array([['f', 'y', 'e'],
                                                   ['b', 'y', 'd']]),
                                         get_ranks=True)
        y_pred_after, _ = loaded_model.predict(np.array([['f', 'y', 'e'],
                                                         ['b', 'y', 'd']]),
                                               get_ranks=True)
        npt.assert_array_equal(y_pred_after, y_pred_before)

        npt.assert_array_equal(
            loaded_model.get_embeddings(['a', 'b'], embedding_type='entity'),
            model.get_embeddings(['a', 'b'], embedding_type='entity'))

        os.remove(example_name)
Exemple #6
0
def test_conve_fit_predict_save_restore():

    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'],
                  ['b', 'y', 'c'], ['f', 'y', 'e']])

    X_test = np.array([['f', 'y', 'a'], ['f', 'y', 'b']])

    model = ConvE(batches_count=1,
                  seed=22,
                  epochs=1,
                  k=10,
                  embedding_model_params={
                      'conv_filters': 16,
                      'conv_kernel_size': 3
                  },
                  optimizer='adam',
                  optimizer_params={'lr': 0.01},
                  loss='bce',
                  loss_params={},
                  regularizer=None,
                  regularizer_params={
                      'p': 2,
                      'lambda': 1e-5
                  },
                  verbose=True,
                  low_memory=True)

    model.fit(X)

    y1 = model.predict(X_test)
    print(y1)

    save_model(model, 'model.tmp')
    del model
    model = restore_model('model.tmp')

    y2 = model.predict(X_test)

    assert np.all(y1 == y2)
    os.remove('model.tmp')
Exemple #7
0
def test_convkb_save_restore():

    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'],
                  ['b', 'y', 'c'], ['f', 'y', 'e']])

    X_test = np.array([['f', 'y', 'a'], ['f', 'y', 'b']])

    model = ConvKB(batches_count=1,
                   seed=22,
                   epochs=1,
                   k=10,
                   eta=1,
                   embedding_model_params={
                       'num_filters': 16,
                       'filter_sizes': [1],
                       'dropout': 0.0,
                       'is_trainable': True
                   },
                   optimizer='adam',
                   optimizer_params={'lr': 0.001},
                   loss='pairwise',
                   loss_params={},
                   verbose=True)

    model.fit(X)
    y1 = model.predict(X_test)

    save_model(model, 'convkb.tmp')
    del model
    model = restore_model('convkb.tmp')

    y2 = model.predict(X_test)

    assert np.all(y1 == y2)

    os.remove('convkb.tmp')
    """
    model = ComplEx(batches_count=10, seed=0, epochs=60, k=50, eta=10,
                    # Use adam optimizer with learning rate 1e-3
                    optimizer='adam', optimizer_params={'lr': 1e-3},
                    # Use pairwise loss with margin 0.5
                    loss='pairwise', loss_params={'margin': 0.5},
                    # Use L2 regularizer with regularizer weight 1e-5
                    regularizer='LP', regularizer_params={'p': 2, 'lambda': 1e-5},
                    # Enable stdout messages (set to false if you don't want to display)
                    verbose=True)"""

    print("Training...")
    x_orig = load_wn18()
    model.fit(X_train)

    save_model(model, model_name_path=ke_model_path)

    model2 = TransE(verbose=True, k=3, epochs=40)
    model2.fit(X_train)
    save_model(model2, model_name_path=ke_model_path + '2')

    #filter_triples = np.concatenate((X_train, X_valid))
    #filter = np.concatenate((X['train'], X['valid'], X['test']))
    #ranks = evaluate_performance(X['test'],
    #                             model=model,
    #                             filter_triples=filter,
    #                             use_default_protocol=True,  # corrupt subj and obj separately while evaluating
    #                             verbose=True)

    #mrr = mrr_score(ranks)
    #hits_10 = hits_at_n_score(ranks, n=10)
Exemple #9
0
                    k=1,
                    eta=20,
                    optimizer='adam',
                    optimizer_params={'lr': 1e-3},
                    loss='multiclass_nll',
                    regularizer='LP',
                    regularizer_params={
                        'p': 3,
                        'lambda': 1e-5
                    },
                    seed=0,
                    verbose=True)

    print("Training...")
    model.fit(X_train)
    save_model(model, model_name_path=ke_model_path)

    filter_triples = np.concatenate((X_train, X_valid))
else:
    model = restore_model(model_name_path=ke_model_path)

from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
from incf.countryutils import transformations

print("Extracting Embeddings..")

id_to_name_map = {
    **dict(zip(df.home_team_id, df.home_team)),
Exemple #10
0
# Ampligraph embedding model (train new model)
###################################
model = TransE(batches_count=100,
               seed=555,
               epochs=100,
               k=100,
               loss='pairwise',
               optimizer='sgd',
               loss_params={
                   'margin': 1.0,
                   'normalize_ent_emb': True
               },
               verbose=True)
model.fit(kg_triples.as_numpy_array())
# Save model for later usage, the it can be reloaded using load_model(os.path.join(experiment_dir,'model_transE.pkl'))
save_model(model, os.path.join(out_dir, 'imdb_transE.pkl'))
##################### End

########### ALTERNATIVE #####################
## OR ## Relaoad a pretrained model
#############################
# Restore models trained using our modified restore  model function
######################
# model=restore_model(os.path.join('/scratch/GW/pool0/gadelrab/multicut/output', 'yago_transE.pkl'))

# Get vectors
# print(model.ent_to_idx.items())
# print(target_entities.get_entities()[:30])
missing = list(
    filter(lambda e: e not in model.ent_to_idx,
           target_entities.get_entities()))
                    loss='multiclass_nll',
                    regularizer='LP',
                    regularizer_params={
                        'p': 3,
                        'lambda': 1e-5
                    },
                    verbose=True)

    positives_filter = X
    tf.logging.set_verbosity(tf.logging.ERROR)

    print("Model training started...")
    model.fit(X_train, early_stopping=False)

    print("Save the model...")
    save_model(model, model_name_path=out_embeddings_file)

    print("Evaluating the model...")
    ranks = evaluate_performance(X_test,
                                 model=model,
                                 filter_triples=positives_filter,
                                 use_default_protocol=True,
                                 verbose=True)
    mrr = mrr_score(ranks)
    print("MRR: %.2f" % (mrr))

    hits_10 = hits_at_n_score(ranks, n=10)
    print("Hits@10: %.2f" % (hits_10))
    hits_3 = hits_at_n_score(ranks, n=3)
    print("Hits@3: %.2f" % (hits_3))
    hits_1 = hits_at_n_score(ranks, n=1)
Exemple #12
0
                epochs=400,           #Numero de iteraciones
                k=100,                #Dimensionalidad del grafo
                eta=20,
                optimizer='adam',
                optimizer_params={'lr': 1e-4},
                loss='multiclass_nll',
                regularizer='LP',
                regularizer_params={'p': 3, 'lambda': 1e-5},
                seed=0,
                verbose=True)


tf.logging.set_verbosity(tf.logging.ERROR)
model.fit(X_train)

save_model(model, model_name_path="../Data/KGEmbedModel.pkl")

#Evaluar modelo
filter_triples = np.concatenate((X_train, X_valid))
ranks = evaluate_performance(X_valid, model=model, filter_triples=filter_triples, use_default_protocol=True, verbose=True, filter_unseen=True)

mr = mr_score(ranks)
mrr = mrr_score(ranks)

print("MRR: %.2f" % (mrr))
print("MR: %.2f" % (mr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))