Beispiel #1
0
def test_evaluate_performance():
    X = load_wn18()
    model = ComplEx(batches_count=10,
                    seed=0,
                    epochs=10,
                    k=150,
                    eta=10,
                    loss='pairwise',
                    loss_params={'margin': 5},
                    regularizer=None,
                    optimizer='adagrad',
                    optimizer_params={'lr': 0.1},
                    verbose=True)
    model.fit(np.concatenate((X['train'], X['valid'])))

    filter = np.concatenate((X['train'], X['valid'], X['test']))
    ranks = evaluate_performance(X['test'][:200],
                                 model=model,
                                 filter_triples=filter,
                                 verbose=True)

    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("ranks: %s" % ranks)
    print("MRR: %f" % mrr)
    print("Hits@10: %f" % hits_10)
Beispiel #2
0
def test_evaluate_with_ent_subset_large_graph():
    set_entity_threshold(1)
    X = load_wn18()
    model = ComplEx(batches_count=10, seed=0, epochs=2, k=10, eta=1,
                optimizer='sgd', optimizer_params={'lr': 1e-5},
                loss='pairwise', loss_params={'margin': 0.5},
                regularizer='LP', regularizer_params={'p': 2, 'lambda': 1e-5},
                verbose=True)

    model.fit(X['train'])

    X_filter = np.concatenate((X['train'], X['valid'], X['test']))
    all_nodes = set(X_filter[:, 0]).union(X_filter[:, 2])
    
    entities_subset = np.random.choice(list(all_nodes), 100, replace=False)
    
    ranks = evaluate_performance(X['test'][::10],
                             model=model,
                             filter_triples=X_filter,
                             corrupt_side='o',
                             use_default_protocol=False,
                             entities_subset=list(entities_subset),
                             verbose=True)
    assert np.sum(ranks > (100 + 1)) == 0, "No ranks must be greater than 101"
    reset_entity_threshold()
Beispiel #3
0
def generate_model(X):
    X_train, X_test = train_test_split_no_unseen(X, test_size=100)

    print('Train set size: ', X_train.shape)
    print('Test set size: ', X_test.shape)

    model = ComplEx(batches_count=100,
                    seed=0,
                    epochs=10,
                    k=150,
                    eta=5,
                    optimizer='adam',
                    optimizer_params={'lr': 1e-3},
                    loss='multiclass_nll',
                    regularizer='LP',
                    regularizer_params={
                        'p': 3,
                        'lambda': 1e-5
                    },
                    verbose=True)

    #positives_filter = X

    tf.logging.set_verbosity(tf.logging.ERROR)

    model.fit(X_train, early_stopping=False)

    print("created the model")

    save_model(model, './best_model.pkl')

    return X_test
Beispiel #4
0
def test_evaluate_performance_so_side_corruptions_without_filter():
    X = load_wn18()
    model = ComplEx(batches_count=10,
                    seed=0,
                    epochs=5,
                    k=200,
                    eta=10,
                    loss='nll',
                    regularizer=None,
                    optimizer='adam',
                    optimizer_params={'lr': 0.01},
                    verbose=True)
    model.fit(X['train'])

    X_filter = np.concatenate((X['train'], X['valid'], X['test']))
    ranks = evaluate_performance(X['test'][::20],
                                 model,
                                 X_filter,
                                 verbose=True,
                                 use_default_protocol=False,
                                 corrupt_side='s+o')
    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("ranks: %s" % ranks)
    print("MRR: %f" % mrr)
    print("Hits@10: %f" % hits_10)
    assert (mrr is not np.Inf)
Beispiel #5
0
def test_evaluate_performance_ranking_against_specified_entities():
    X = load_wn18()
    model = ComplEx(batches_count=10,
                    seed=0,
                    epochs=1,
                    k=20,
                    eta=10,
                    loss='nll',
                    regularizer=None,
                    optimizer='adam',
                    optimizer_params={'lr': 0.01},
                    verbose=True)
    model.fit(X['train'])

    X_filter = np.concatenate((X['train'], X['valid'], X['test']))
    entities_subset = np.concatenate(
        [X['test'][::1000, 0], X['test'][::1000, 2]], 0)

    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(X['test'][::1000],
                                 model,
                                 X_filter,
                                 verbose=True,
                                 corrupt_side='s+o',
                                 use_default_protocol=True,
                                 entities_subset=entities_subset)
    ranks = ranks.reshape(-1)
    assert (np.sum(ranks > len(entities_subset)) == 0)
Beispiel #6
0
def test_evaluate_performance_ranking_against_shuffled_all_entities():
    """ Compares mrr of test set by using default protocol against all entities vs 
        mrr of corruptions generated by corrupting using entities_subset = all entities shuffled
    """
    import random
    X = load_wn18()
    model = ComplEx(batches_count=10,
                    seed=0,
                    epochs=1,
                    k=20,
                    eta=10,
                    loss='nll',
                    regularizer=None,
                    optimizer='adam',
                    optimizer_params={'lr': 0.01},
                    verbose=True)
    model.fit(X['train'])

    X_filter = np.concatenate((X['train'], X['valid'], X['test']))
    entities_subset = random.shuffle(list(model.ent_to_idx.keys()))

    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks_all = evaluate_performance(X['test'][::1000],
                                     model,
                                     X_filter,
                                     verbose=True,
                                     corrupt_side='s,o')

    ranks_suffled_ent = evaluate_performance(X['test'][::1000],
                                             model,
                                             X_filter,
                                             verbose=True,
                                             corrupt_side='s,o',
                                             entities_subset=entities_subset)
    assert (mrr_score(ranks_all) == mrr_score(ranks_suffled_ent))
Beispiel #7
0
def test_find_clusters():
    X = np.array([['a', 'y', 'b'],
                  ['b', 'y', 'a'],
                  ['a', 'y', 'c'],
                  ['c', 'y', 'a'],
                  ['a', 'y', 'd'],
                  ['c', 'x', 'd'],
                  ['b', 'y', 'c'],
                  ['f', 'y', 'e']])
    model = ComplEx(k=2, batches_count=2)
    model.fit(X)
    clustering_algorithm = DBSCAN(eps=1e-3, min_samples=1)

    labels = find_clusters(X, model, clustering_algorithm, mode='triple')
    assert np.array_equal(labels, np.array([0, 1, 2, 3, 4, 5, 6, 7]))

    labels = find_clusters(np.unique(X[:, 0]), model, clustering_algorithm, mode='entity')
    assert np.array_equal(labels, np.array([0, 1, 2, 3]))

    labels = find_clusters(np.unique(X[:, 1]), model, clustering_algorithm, mode='relation')
    assert np.array_equal(labels, np.array([0, 1]))

    labels = find_clusters(np.unique(X[:, 2]), model, clustering_algorithm, mode='entity')
    assert np.array_equal(labels, np.array([0, 1, 2, 3, 4]))

    with pytest.raises(ValueError):
        find_clusters(X, model, clustering_algorithm, mode='hah')
    with pytest.raises(ValueError):
        find_clusters(X, model, clustering_algorithm, mode='entity')
    with pytest.raises(ValueError):
        find_clusters(X, model, clustering_algorithm, mode='relation')
    with pytest.raises(ValueError):
        find_clusters(np.unique(X[:, 0]), model, clustering_algorithm, mode='triple')
Beispiel #8
0
def test_fit_predict_wn18_ComplEx():
    X = load_wn18()
    model = ComplEx(batches_count=1, seed=555, epochs=5, k=100,
                    loss='pairwise', loss_params={'margin': 1}, regularizer='LP',
                    regularizer_params={'lambda': 0.1, 'p': 2}, 
                    optimizer='adagrad', optimizer_params={'lr':0.1})
    model.fit(X['train'])
    y = model.predict(X['test'][:1], get_ranks=True)
    print(y)
Beispiel #9
0
def test_find_duplicates():
    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'x', 'd'],
                  ['b', 'y', 'c'], ['f', 'y', 'e']])
    model = ComplEx(k=2, batches_count=2)
    model.fit(X)

    entities = set('a b c d e f'.split())
    relations = set('x y'.split())

    def asserts(tol, dups, ent_rel, subspace):
        assert tol > 0.0
        assert len(dups) <= len(ent_rel)
        assert all(len(d) <= len(ent_rel) for d in dups)
        assert all(d.issubset(subspace) for d in dups)

    dups, tol = find_duplicates(X,
                                model,
                                mode='triple',
                                tolerance='auto',
                                expected_fraction_duplicates=0.5)
    asserts(tol, dups, X, {tuple(x) for x in X})

    dups, tol = find_duplicates(X, model, mode='triple', tolerance=1.0)
    assert tol == 1.0
    asserts(tol, dups, X, {tuple(x) for x in X})

    dups, tol = find_duplicates(np.unique(X[:, 0]),
                                model,
                                mode='entity',
                                tolerance='auto',
                                expected_fraction_duplicates=0.5)
    asserts(tol, dups, entities, entities)

    dups, tol = find_duplicates(np.unique(X[:, 2]),
                                model,
                                mode='entity',
                                tolerance='auto',
                                expected_fraction_duplicates=0.5)
    asserts(tol, dups, entities, entities)

    dups, tol = find_duplicates(np.unique(X[:, 1]),
                                model,
                                mode='relation',
                                tolerance='auto',
                                expected_fraction_duplicates=0.5)
    asserts(tol, dups, relations, relations)

    with pytest.raises(ValueError):
        find_duplicates(X, model, mode='hah')
    with pytest.raises(ValueError):
        find_duplicates(X, model, mode='entity')
    with pytest.raises(ValueError):
        find_duplicates(X, model, mode='relation')
    with pytest.raises(ValueError):
        find_duplicates(np.unique(X[:, 0]), model, mode='triple')
Beispiel #10
0
def test_evaluate_performance_filter_without_xtest():
    X = load_wn18()
    model = ComplEx(batches_count=10, seed=0, epochs=1, k=20, eta=10, loss='nll',
                    regularizer=None, optimizer='adam', optimizer_params={'lr': 0.01}, verbose=True)
    model.fit(X['train'])

    X_filter = np.concatenate((X['train'], X['valid'])) # filter does not contain X_test
    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(X['test'][::1000], model, X_filter, verbose=True, corrupt_side='s,o')
    assert(mrr_score(ranks)>0)
Beispiel #11
0
def kge(triples, kge_name, verbose):
    # Train test split
    t_size = math.ceil(len(triples)*0.2)
    X_train, X_test = train_test_split_no_unseen(triples, test_size=t_size)

    # Select kge_name
    if kge_name == 'complex':
        # ComplEx model
        model = ComplEx(batches_count=50,
                        epochs=300,
                        k=100,
                        eta=20,
                        optimizer='adam',
                        optimizer_params={'lr':1e-4},
                        loss='multiclass_nll',
                        regularizer='LP',
                        regularizer_params={'p':3, 'lambda':1e-5},
                        seed=0,
                        verbose=verbose)
    else:
        sys.exit('Given kge_name is not valid.')

    model.fit(X_train)

    #Embedding evaluation
    if verbose:
        filter_triples = np.concatenate((X_train, X_test))
        ranks = evaluate_performance(X_test,
                                     model=model,
                                     filter_triples=filter_triples,
                                     use_default_protocol=True,
                                     verbose=True)

        mrr = mrr_score(ranks)
        print("MRR: %.2f" % (mrr))
        mr = mr_score(ranks)
        print("MR: %.2f" % (mr))
        hits_10 = hits_at_n_score(ranks, n=10)
        print("Hits@10: %.2f" % (hits_10))
        hits_3 = hits_at_n_score(ranks, n=3)
        print("Hits@3: %.2f" % (hits_3))
        hits_1 = hits_at_n_score(ranks, n=1)
        print("Hits@1: %.2f" % (hits_1))

        print('''
        - Ampligraph example -
        MRR: 0.25
        MR: 4927.33
        Hits@10: 0.35
        Hits@3: 0.28
        Hits@1: 0.19
        ''')

    return model
Beispiel #12
0
def test_missing_entity_ComplEx():

    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'],
                  ['b', 'y', 'c'], ['f', 'y', 'e']])
    model = ComplEx(batches_count=1, seed=555, epochs=2, k=5)
    model.fit(X)
    with pytest.raises(ValueError):
        model.predict(['a', 'y', 'zzzzzzzzzzz'])
    with pytest.raises(ValueError):
        model.predict(['a', 'xxxxxxxxxx', 'e'])
    with pytest.raises(ValueError):
        model.predict(['zzzzzzzz', 'y', 'e'])
Beispiel #13
0
def train_complex(train_samples: iter):
    model = ComplEx(batches_count=100,
                    seed=0,
                    epochs=200,
                    k=150,
                    eta=5,
                    optimizer='adam',
                    optimizer_params={'lr': 1e-3},
                    loss='multiclass_nll',
                    regularizer='LP',
                    regularizer_params={
                        'p': 3,
                        'lambda': 1e-5
                    },
                    verbose=True)
    model.fit(train_samples, early_stopping=False)
    return model
Beispiel #14
0
def test_fit_predict_CompleEx():
    model = ComplEx(batches_count=1, seed=555, epochs=20, k=10,
                    loss='pairwise', loss_params={'margin': 1}, regularizer='LP',
                    regularizer_params={'lambda': 0.1, 'p': 2}, 
                    optimizer='adagrad', optimizer_params={'lr':0.1})
    X = np.array([['a', 'y', 'b'],
                  ['b', 'y', 'a'],
                  ['a', 'y', 'c'],
                  ['c', 'y', 'a'],
                  ['a', 'y', 'd'],
                  ['c', 'y', 'd'],
                  ['b', 'y', 'c'],
                  ['f', 'y', 'e']])
    model.fit(X)
    y_pred, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True)
    print(y_pred)
    assert y_pred[0] > y_pred[1]
Beispiel #15
0
def test_discover_facts():

    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'],
                  ['b', 'y', 'c'], ['f', 'y', 'e']])
    model = ComplEx(batches_count=1, seed=555, epochs=2, k=5)

    with pytest.raises(ValueError):
        discover_facts(X, model)

    model.fit(X)

    with pytest.raises(ValueError):
        discover_facts(X, model, strategy='error')

    with pytest.raises(ValueError):
        discover_facts(X, model, strategy='random_uniform', target_rel='error')
Beispiel #16
0
def test_large_graph_mode_adam():
    set_entity_threshold(10)
    X = load_wn18()
    model = ComplEx(batches_count=100,
                    seed=555,
                    epochs=1,
                    k=50,
                    loss='multiclass_nll',
                    loss_params={'margin': 5},
                    verbose=True,
                    optimizer='adam',
                    optimizer_params={'lr': 0.001})
    try:
        model.fit(X['train'])
    except Exception as e:
        print(str(e))

    reset_entity_threshold()
Beispiel #17
0
def test_large_graph_mode():
    set_entity_threshold(10)
    X = load_wn18()
    model = ComplEx(batches_count=100,
                    seed=555,
                    epochs=1,
                    k=50,
                    loss='multiclass_nll',
                    loss_params={'margin': 5},
                    verbose=True,
                    optimizer='sgd',
                    optimizer_params={'lr': 0.001})
    model.fit(X['train'])
    X_filter = np.concatenate((X['train'], X['valid'], X['test']), axis=0)
    evaluate_performance(X['test'][::1000],
                         model,
                         X_filter,
                         verbose=True,
                         corrupt_side='s,o')

    y = model.predict(X['test'][:1])
    print(y)
    reset_entity_threshold()
Beispiel #18
0
def test_retrain():
    model = ComplEx(batches_count=1, seed=555, epochs=20, k=10,
                    loss='pairwise', loss_params={'margin': 1}, regularizer='LP',
                    regularizer_params={'lambda': 0.1, 'p': 2}, 
                    optimizer='adagrad', optimizer_params={'lr':0.1})
    X = np.array([['a', 'y', 'b'],
                  ['b', 'y', 'a'],
                  ['a', 'y', 'c'],
                  ['c', 'y', 'a'],
                  ['a', 'y', 'd'],
                  ['c', 'y', 'd'],
                  ['b', 'y', 'c'],
                  ['f', 'y', 'e']])
    model.fit(X)
    y_pred_1st, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True)
    model.fit(X)
    y_pred_2nd, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True)
    np.testing.assert_array_equal(y_pred_1st, y_pred_2nd)
Beispiel #19
0
        ignore_index=True)
    positives_filter = pd.concat(
        [train_X_temp.iloc[:, :], test_X_temp.iloc[:, :]],
        axis='index',
        ignore_index=True)
    train_y, test_y, positives_filter = to_categorical(
        train_y, dtype=np.int32), to_categorical(
            test_y, dtype=np.int32), positives_filter.to_numpy(dtype=np.int32)
    print(
        "Shape of train_y: %s;  Shape of test_y: %s;  Shape of positives_filter: %s"
        % (train_y.shape, test_y.shape, positives_filter.shape))

    # Feature Scaling: Normalize dataset via Generation of Embeddings
    print("\nFeature Scaling: Embeddings Generation")
    embed_dim = 100
    embeds_model = ComplEx(k=embed_dim, verbose=True)
    tf.compat.v1.logging.set_verbosity(
        tf.compat.v1.logging.ERROR
    )  # TensorFlow will tell you all messages that have the label ERROR
    embeds_model.fit(positives_filter)

    embeds_source = embeds_model.get_embeddings(positives_filter[:, 0],
                                                embedding_type='entity')
    embeds_dest = embeds_model.get_embeddings(positives_filter[:, 2],
                                              embedding_type='entity')
    embeds = np.concatenate((embeds_source, embeds_dest), axis=1)

    train_sz = train_X_temp.shape[0]
    train_X, test_X = embeds[:train_sz, :], embeds[train_sz:, :]
    train_X = train_X.reshape(
        train_X.shape[0], 4, embed_dim
Beispiel #20
0
def select_kge(kge_name, batch_size, epochs, seed, verbose):
    model = ''
    # Select kge_name
    if kge_name == 'complex':
        # ComplEx model
        model = ComplEx(
            batches_count=batch_size,
            epochs=epochs,
            k=150,
            eta=20,
            optimizer='adam',
            optimizer_params={'margin':
                              5},  #,'lr':learning_rate}, # default lr:0.1
            loss='multiclass_nll',
            loss_params={},
            regularizer='LP',
            regularizer_params={
                'p': 2,
                'lambda': 1e-4
            },
            seed=seed,
            verbose=verbose)
    elif kge_name == 'hole':
        # HolE model
        model = HolE(batches_count=batch_size,
                     epochs=epochs,
                     k=100,
                     eta=20,
                     optimizer='adam',
                     optimizer_params={'lr': learning_rate},
                     loss='multiclass_nll',
                     regularizer='LP',
                     regularizer_params={
                         'p': 3,
                         'lambda': 1e-5
                     },
                     seed=seed,
                     verbose=verbose)
    elif kge_name == 'transe':
        # TransE model
        model = TransE(
            batches_count=batch_size,
            epochs=epochs,
            k=350,
            eta=20,
            optimizer='adam',
            optimizer_params={'margin':
                              5},  #,'lr':learning_rate}, # default lr:0.1
            loss='multiclass_nll',  #loss='pairwise',
            loss_params={},  #loss_params={'margin:5'},
            regularizer='LP',
            regularizer_params={
                'p': 2,
                'lambda': 1e-4
            },
            seed=seed,
            verbose=verbose)
    else:
        sys.exit('Given kge_name is not valid.')

    return model
  # Client and the SPARQL Endpoint
  
  endpoint = 'http://10.161.202.101:8890/sparql/'
  port = 8890
  output_format = HttpClientDataFormat.PANDAS_DF
  client = HttpClient(endpoint_url=endpoint, port=port, return_format=output_format, timeout=timeout,
                      default_graph_uri=default_graph_url, max_rows=max_rows)
  
  # Get all triples where the object is a URI
  dataset = graph.feature_domain_range(s, p, o).filter({o: ['isURI']})
  
  # execute 
  df = dataset.execute(client, return_format=output_format)
    
  # Train/test split and create ComplEx model from ampligraph library
  
  triples = df.to_numpy()
  X_train, X_test = train_test_split_no_unseen(triples, test_size=10000)
  
  # use ComplEx model to build the embedding 
  model = ComplEx(batches_count=50,epochs=300,k=100,eta=20, optimizer='adam',optimizer_params={'lr':1e-4}, 
          loss='multiclass_nll',regularizer='LP', regularizer_params={'p':3, 'lambda':1e-5}, seed=0,verbose=True)
  model.fit(X_train)
  
  # Evaluate the embedding model
  filter_triples = np.concatenate((X_train, X_test))
  ranks = evaluate_performance(X_test, model=model, filter_triples=filter_triples,
                                use_default_protocol=True, verbose=True)
  mr  = mr_score(ranks)
  mrr = mrr_score(ranks)
Beispiel #22
0
from ampligraph.datasets import load_wn18
from ampligraph.latent_features import ComplEx
from ampligraph.evaluation import evaluate_performance, hits_at_n_score, mrr_score

X = load_wn18()

model = ComplEx(batches_count=10,
                seed=0,
                epochs=20,
                k=50,
                eta=2,
                loss="nll",
                optimizer="adam",
                optimizer_params={"lr": 0.01})

model.fit(X['train'])

y_pred = model.predict(X['test'][:5, ])

from scipy.special import expit

print(expit(y_pred))

ranks = evaluate_performance(X['test'][:10], model=model)
print(ranks)

mrr = mrr_score(ranks)
hits_10 = hits_at_n_score(ranks, n=10)
print("MRR: %f, Hits@10: %f" % (mrr, hits_10))

import matplotlib.pyplot as plt
"""---
# 3. Training ComplEx model
"""

import tensorflow
print(tensorflow.__version__)

from ampligraph.latent_features import ComplEx

model = ComplEx(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

positives_filter = X

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

model.fit(data['train'], early_stopping = False)

"""---
# 4.  Saving and restoring a model
    ])
    X['valid'] = np.array([
        list((tok2id[r[0]], r[1], tok2id[r[2]])) for r in X['valid']
        if r[0] in known_entities and r[2] in known_entities
    ])
    X['test'] = np.array([
        list((tok2id[r[0]], r[1], tok2id[r[2]])) for r in X['test']
        if r[0] in known_entities and r[2] in known_entities
    ])
    X_train, X_valid = X['train'], X['valid']
    print('Train set size: ', X_train.shape)
    print('Test set size: ', X_valid.shape)
    ke_kwargs = {"verbose": True, "k": 70, "epochs": 100}

    # ComplEx brings double dimensions because of the twofold nature of complex numbers
    model = ComplEx(**ke_kwargs)
    print("Training...")
    model.fit(X_train)
    save_model(model, model_name_path=ke_model_path)
    # If we don't transpose the multidimensionality of the embeddings to 3D but take just 3-D-embeddings,
    # This can't be with ComplEX because, it will be an even number and 3 is not
    ke_kwargs['k'] = 3
    model2 = TransE(**ke_kwargs)
    model2.fit(X_train)
    save_model(model2, model_name_path=ke_model_path + '2')
else:
    model = restore_model(model_name_path=ke_model_path)
    model2 = restore_model(model_name_path=ke_model_path + '2')
    with open(ke_wnkeys_path, 'rb') as handle:
        tok2id, id2tok = pickle.load(handle)
 print("Shape of test_X: %s;  Shape of test_y: %s" % (test_X.shape, test_y.shape))
 print("Shape of positives_filter: " + str(positives_filter.shape))
     
 for j in range(len(mdl)):
     # Fit & Train model via ampliGraph library
     log_key = mdl[j]+": "+graph_data[i]
     log_file = open("eval_log.txt", "a")
     print("\n\n----"+log_key+"----", file=log_file)
     print("------------------------------------------------")
     print("%d) Implementation Model: %s" % (1, mdl[j]))
     print("------------------------------------------------")
     start_time = time.time()  # START: Training Time Tracker    
     K.clear_session()  # Kills current TF comp-graph & creates a new one
     
     if (mdl[j] == "ComplEx"):
         model = ComplEx(verbose=True)
     elif (mdl[j] == "ConvKB"):
         model = ConvKB(verbose=True)
     elif (mdl[j] == "DistMult"):
         model = DistMult(verbose=True)
     elif (mdl[j] == "HolE"):
         model = HolE(verbose=True)
     elif (mdl[j] == "TransE"):
         model = TransE(verbose=True)
     elif (mdl[j] == "RandomBaseline"):
         model = RandomBaseline(verbose=True)
     tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)  # TensorFlow will tell you all messages that have the label ERROR
     model.fit(train_X)
     
     # Save model at its best-performance point
     save_model(model, 'best_ampliGraph_model.pkl')
    num_epochs = 300

    X = load_from_rdf('.', graph_file, rdf_format='nt', data_home='.')
    test_size = int(0.05 * X.shape[0])
    X_train, X_test = train_test_split_no_unseen(X, test_size=test_size)

    print('Train set size: ', X_train.shape)
    print('Test set size: ', X_test.shape)

    model = ComplEx(batches_count=100,
                    seed=0,
                    epochs=num_epochs,
                    k=embedding_dims,
                    eta=5,
                    optimizer='adam',
                    optimizer_params={'lr': 1e-3},
                    loss='multiclass_nll',
                    regularizer='LP',
                    regularizer_params={
                        'p': 3,
                        'lambda': 1e-5
                    },
                    verbose=True)

    positives_filter = X
    tf.logging.set_verbosity(tf.logging.ERROR)

    print("Model training started...")
    model.fit(X_train, early_stopping=False)

    print("Save the model...")
    save_model(model, model_name_path=out_embeddings_file)
X_train_valid, X_test = train_test_split_no_unseen(x, test_size=3000)
X_train, X_valid = train_test_split_no_unseen(X_train_valid, test_size=3000)
# X_train, X_test = train_test_split_no_unseen(x, test_size=3000)

print('Train set size: ', X_train.shape)
print('Test set size: ', X_test.shape)
print('Validation set size: ', X_valid.shape)

from ampligraph.latent_features import ComplEx

model = ComplEx(batches_count=100, 
                seed=0, 
                epochs=300, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

positives_filter = x

# tf.logging.set_verbosity(tf.logging.ERROR)
# # model.fit(X_train, ,early_stopping = True)
# model.fit(X_train, early_stopping = True,early_stopping_params = \
#                   {
#                       'x_valid': X_valid,       # validation set
#                       'criteria':'hits1',         # Uses hits10 criteria for early stopping
#                       'burn_in': 100,              # early stopping kicks in after 100 epochs
Beispiel #28
0
import os
from ampligraph.utils import save_model, restore_model

import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from ampligraph.evaluation import evaluate_performance

ke_model_path = "./models/football_ke.amplimodel"
if not os.path.isfile(ke_model_path):
    model = ComplEx(batches_count=50,
                    epochs=1,
                    k=1,
                    eta=20,
                    optimizer='adam',
                    optimizer_params={'lr': 1e-3},
                    loss='multiclass_nll',
                    regularizer='LP',
                    regularizer_params={
                        'p': 3,
                        'lambda': 1e-5
                    },
                    seed=0,
                    verbose=True)

    print("Training...")
    model.fit(X_train)
    save_model(model, model_name_path=ke_model_path)

    filter_triples = np.concatenate((X_train, X_valid))
else:
    model = restore_model(model_name_path=ke_model_path)
Beispiel #29
0
def test_query_topn():

    X = np.array([['a', 'y', 'b'],
                  ['b', 'y', 'a'],
                  ['a', 'y', 'c'],
                  ['c', 'y', 'a'],
                  ['a', 'y', 'd'],
                  ['c', 'x', 'd'],
                  ['b', 'y', 'c'],
                  ['f', 'y', 'e'],
                  ['a', 'z', 'f'],
                  ['c', 'z', 'f'],
                  ['b', 'z', 'f'],
                  ])

    model = ComplEx(k=2, batches_count=2)

    with pytest.raises(ValueError): # Model not fitted
        query_topn(model, top_n=2)

    model.fit(X)

    with pytest.raises(ValueError):
        query_topn(model, top_n=2)
    with pytest.raises(ValueError):
        query_topn(model, top_n=2, head='a')
    with pytest.raises(ValueError):
        query_topn(model, top_n=2, relation='y')
    with pytest.raises(ValueError):
        query_topn(model, top_n=2, tail='e')
    with pytest.raises(ValueError):
        query_topn(model, top_n=2, head='a', relation='y', tail='e')
    with pytest.raises(ValueError):
        query_topn(model, top_n=2, head='xx', relation='y')
    with pytest.raises(ValueError):
        query_topn(model, top_n=2, head='a', relation='yakkety')
    with pytest.raises(ValueError):
        query_topn(model, top_n=2, head='a', tail='sax')
    with pytest.raises(ValueError):
        query_topn(model, top_n=2, head='a', relation='x', rels_to_consider=['y', 'z'])
    with pytest.raises(ValueError):
        query_topn(model, top_n=2, head='a', tail='f', rels_to_consider=['y', 'z', 'error'])
    with pytest.raises(ValueError):
        query_topn(model, top_n=2, head='a', tail='e', rels_to_consider='y')
    with pytest.raises(ValueError):
        query_topn(model, top_n=2, head='a', relation='x', ents_to_consider=['zz', 'top'])
    with pytest.raises(ValueError):
        query_topn(model, top_n=2, head='a', tail='e', ents_to_consider=['a', 'b'])

    subj, pred, obj, top_n = 'a', 'x', 'e', 3

    Y, S = query_topn(model, top_n=top_n, head=subj, relation=pred)
    assert len(Y) == len(S)
    assert len(Y) == top_n
    assert np.all(Y[:, 0] == subj)
    assert np.all(Y[:, 1] == pred)

    Y, S = query_topn(model, top_n=top_n, relation=pred, tail=obj)
    assert np.all(Y[:, 1] == pred)
    assert np.all(Y[:, 2] == obj)

    ents_to_con = ['a', 'b', 'c', 'd']
    Y, S = query_topn(model, top_n=top_n, relation=pred, tail=obj, ents_to_consider=ents_to_con)
    assert np.all([x in ents_to_con for x in Y[:, 0]])

    rels_to_con = ['y', 'x']
    Y, S = query_topn(model, top_n=10, head=subj, tail=obj, rels_to_consider=rels_to_con)
    assert np.all([x in rels_to_con for x in Y[:, 1]])

    Y, S = query_topn(model, top_n=10, relation=pred, tail=obj)
    assert all(S[i] >= S[i + 1] for i in range(len(S) - 1))
def main():
    # load Wordnet18 dataset:
    # X = load_wn18()
    X = load_fb15k_237()
    modify_flag = False

    # Initialize a ComplEx neural embedding model with pairwise loss function:
    # The model will be trained for 300 epochs.
    model = ComplEx(
        batches_count=10,
        seed=0,
        epochs=30,
        k=150,
        eta=10,
        # Use adam optimizer with learning rate 1e-3
        optimizer='adam',
        optimizer_params={'lr': 1e-3},
        # Use pairwise loss with margin 0.5
        loss='pairwise',
        loss_params={'margin': 0.5},
        # Use L2 regularizer with regularizer weight 1e-5
        regularizer='LP',
        regularizer_params={
            'p': 2,
            'lambda': 1e-5
        },
        # Enable stdout messages (set to false if you don't want to display)
        verbose=True)  #, modify_flag = modify_flag)

    if False:
        # ground truth params (have not tried yet)
        # k: 350; epochs: 4000; eta: 30; loss: self_adversarial; loss_params: alpha: 1; margin: 0.5; optimizer: adam; optimizer_params: lr: 0.0001; seed: 0; batches_count: 50
        model = ComplEx(
            batches_count=50,
            seed=0,
            epochs=4000,
            k=350,
            eta=30,
            # Use adam optimizer with learning rate 1e-3
            optimizer='adam',
            optimizer_params={'lr': 1e-4},
            # Use pairwise loss with margin 0.5
            loss='self_adversarial',
            loss_params={
                'margin': 0.5,
                'alpha': 1
            },
            # Use L2 regularizer with regularizer weight 1e-5
            regularizer='LP',
            regularizer_params={
                'p': 2,
                'lambda': 1e-5
            },
            # Enable stdout messages (set to false if you don't want to display)
            verbose=True,
            modify_flag=modify_flag)

    # For evaluation, we can use a filter which would be used to filter out
    # positives statements created by the corruption procedure.
    # Here we define the filter set by concatenating all the positives
    filter = np.concatenate((X['train'], X['valid'], X['test']))

    # Fit the model on training and validation set
    model.fit(X['train'],
              early_stopping=True,
              early_stopping_params= \
                  {
                      'x_valid': X['valid'],  # validation set
                      'criteria': 'hits10',  # Uses hits10 criteria for early stopping
                      'burn_in': 100,  # early stopping kicks in after 100 epochs
                      'check_interval': 20,  # validates every 20th epoch
                      'stop_interval': 5,  # stops if 5 successive validation checks are bad.
                      'x_filter': filter,  # Use filter for filtering out positives
                      'corruption_entities': 'all',  # corrupt using all entities
                      'corrupt_side': 's+o'  # corrupt subject and object (but not at once)
                  }
              )

    # Run the evaluation procedure on the test set (with filtering).
    # To disable filtering: filter_triples=None
    # Usually, we corrupt subject and object sides separately and compute ranks

    # restore model
    sys.exit()

    # import the inspect_checkpoint library

    sys.exit()
    ranks = evaluate_performance(
        X['test'],
        model=model,
        filter_triples=filter,
        use_default_protocol=
        True,  # corrupt subj and obj separately while evaluating
        verbose=True)

    # compute and print metrics:
    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("MRR: %f, Hits@10: %f" % (mrr, hits_10))