Ejemplo n.º 1
0
def test_evaluate_RandomBaseline():
    model = RandomBaseline(seed=0)
    X = load_wn18()
    model.fit(X["train"])
    ranks = evaluate_performance(X["test"],
                                 model=model,
                                 corrupt_side='s+o',
                                 verbose=False)
    hits10 = hits_at_n_score(ranks, n=10)
    hits1 = hits_at_n_score(ranks, n=1)
    assert ranks.shape == (len(X['test']), )
    assert hits10 < 0.01 and hits1 == 0.0

    ranks = evaluate_performance(X["test"],
                                 model=model,
                                 corrupt_side='s,o',
                                 verbose=False)
    hits10 = hits_at_n_score(ranks, n=10)
    hits1 = hits_at_n_score(ranks, n=1)
    assert ranks.shape == (len(X['test']), 2)
    assert hits10 < 0.01 and hits1 == 0.0

    ranks_filtered = evaluate_performance(X["test"],
                                          filter_triples=np.concatenate(
                                              (X['train'], X['valid'],
                                               X['test'])),
                                          model=model,
                                          corrupt_side='s,o',
                                          verbose=False)
    hits10 = hits_at_n_score(ranks_filtered, n=10)
    hits1 = hits_at_n_score(ranks_filtered, n=1)
    assert ranks_filtered.shape == (len(X['test']), 2)
    assert hits10 < 0.01 and hits1 == 0.0
    assert np.all(ranks_filtered <= ranks)
    assert np.any(ranks_filtered != ranks)
Ejemplo n.º 2
0
def test_evaluate_performance_ranking_against_shuffled_all_entities():
    """ Compares mrr of test set by using default protocol against all entities vs 
        mrr of corruptions generated by corrupting using entities_subset = all entities shuffled
    """
    import random
    X = load_wn18()
    model = ComplEx(batches_count=10,
                    seed=0,
                    epochs=1,
                    k=20,
                    eta=10,
                    loss='nll',
                    regularizer=None,
                    optimizer='adam',
                    optimizer_params={'lr': 0.01},
                    verbose=True)
    model.fit(X['train'])

    X_filter = np.concatenate((X['train'], X['valid'], X['test']))
    entities_subset = random.shuffle(list(model.ent_to_idx.keys()))

    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks_all = evaluate_performance(X['test'][::1000],
                                     model,
                                     X_filter,
                                     verbose=True,
                                     corrupt_side='s,o')

    ranks_suffled_ent = evaluate_performance(X['test'][::1000],
                                             model,
                                             X_filter,
                                             verbose=True,
                                             corrupt_side='s,o',
                                             entities_subset=entities_subset)
    assert (mrr_score(ranks_all) == mrr_score(ranks_suffled_ent))
Ejemplo n.º 3
0
def test_evaluate_performance_default_protocol_with_filter():
    wn18 = load_wn18()

    X_filter = np.concatenate((wn18['train'], wn18['valid'], wn18['test']))


    model = TransE(batches_count=10, seed=0, epochs=1, 
                    k=50, eta=10,  verbose=True, 
                    embedding_model_params={'normalize_ent_emb':False, 'norm':1},
                    loss = 'self_adversarial', loss_params={'margin':1, 'alpha':0.5}, 
                    optimizer='adam', 
                    optimizer_params={'lr':0.0005})

    model.fit(wn18['train'])


    from ampligraph.evaluation import evaluate_performance
    ranks_sep = []
    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(wn18['test'][::100], model, X_filter, verbose=True, corrupt_side='o',
                                 use_default_protocol=False)

    ranks_sep.extend(ranks)
    from ampligraph.evaluation import evaluate_performance

    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(wn18['test'][::100], model, X_filter, verbose=True, corrupt_side='s',
                                 use_default_protocol=False)
    ranks_sep.extend(ranks)
    print('----------EVAL WITH FILTER-----------------')
    print('----------Subj and obj corrupted separately-----------------')
    mr_sep = mr_score(ranks_sep)
    print('MAR:', mr_sep)
    print('Mrr:', mrr_score(ranks_sep))
    print('hits10:', hits_at_n_score(ranks_sep, 10))
    print('hits3:', hits_at_n_score(ranks_sep, 3))
    print('hits1:', hits_at_n_score(ranks_sep, 1))


    from ampligraph.evaluation import evaluate_performance

    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(wn18['test'][::100], model, X_filter, verbose=True, corrupt_side='s+o',
                                 use_default_protocol=True)
    print('----------corrupted with default protocol-----------------')
    mr_joint = mr_score(ranks)
    mrr_joint = mrr_score(ranks)
    print('MAR:', mr_joint)
    print('Mrr:', mrr_joint)
    print('hits10:', hits_at_n_score(ranks, 10))
    print('hits3:', hits_at_n_score(ranks, 3))
    print('hits1:', hits_at_n_score(ranks, 1))
    
    np.testing.assert_equal(mr_sep, mr_joint)
    assert(mrr_joint is not np.Inf)
Ejemplo n.º 4
0
def test_evaluate_performance_so_side_corruptions_without_filter():
    X = load_wn18()
    model = ComplEx(batches_count=10,
                    seed=0,
                    epochs=5,
                    k=200,
                    eta=10,
                    loss='nll',
                    regularizer=None,
                    optimizer='adam',
                    optimizer_params={'lr': 0.01},
                    verbose=True)
    model.fit(X['train'])

    X_filter = np.concatenate((X['train'], X['valid'], X['test']))
    ranks = evaluate_performance(X['test'][::20],
                                 model,
                                 X_filter,
                                 verbose=True,
                                 use_default_protocol=False,
                                 corrupt_side='s+o')
    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("ranks: %s" % ranks)
    print("MRR: %f" % mrr)
    print("Hits@10: %f" % hits_10)
    assert (mrr is not np.Inf)
Ejemplo n.º 5
0
def test_evaluate_with_ent_subset_large_graph():
    set_entity_threshold(1)
    X = load_wn18()
    model = ComplEx(batches_count=10, seed=0, epochs=2, k=10, eta=1,
                optimizer='sgd', optimizer_params={'lr': 1e-5},
                loss='pairwise', loss_params={'margin': 0.5},
                regularizer='LP', regularizer_params={'p': 2, 'lambda': 1e-5},
                verbose=True)

    model.fit(X['train'])

    X_filter = np.concatenate((X['train'], X['valid'], X['test']))
    all_nodes = set(X_filter[:, 0]).union(X_filter[:, 2])
    
    entities_subset = np.random.choice(list(all_nodes), 100, replace=False)
    
    ranks = evaluate_performance(X['test'][::10],
                             model=model,
                             filter_triples=X_filter,
                             corrupt_side='o',
                             use_default_protocol=False,
                             entities_subset=list(entities_subset),
                             verbose=True)
    assert np.sum(ranks > (100 + 1)) == 0, "No ranks must be greater than 101"
    reset_entity_threshold()
Ejemplo n.º 6
0
def test_evaluate_performance_TransE():
    X = load_wn18()
    model = TransE(batches_count=10,
                   seed=0,
                   epochs=100,
                   k=100,
                   eta=5,
                   optimizer_params={'lr': 0.1},
                   loss='pairwise',
                   loss_params={'margin': 5},
                   optimizer='adagrad')
    model.fit(np.concatenate((X['train'], X['valid'])))

    filter_triples = np.concatenate((X['train'], X['valid'], X['test']))
    ranks = evaluate_performance(X['test'][:200],
                                 model=model,
                                 filter_triples=filter_triples,
                                 verbose=True)

    # ranks = evaluate_performance(X['test'][:200], model=model)

    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("ranks: %s" % ranks)
    print("MRR: %f" % mrr)
    print("Hits@10: %f" % hits_10)
Ejemplo n.º 7
0
def test_evaluate_performance_ranking_against_specified_entities():
    X = load_wn18()
    model = ComplEx(batches_count=10,
                    seed=0,
                    epochs=1,
                    k=20,
                    eta=10,
                    loss='nll',
                    regularizer=None,
                    optimizer='adam',
                    optimizer_params={'lr': 0.01},
                    verbose=True)
    model.fit(X['train'])

    X_filter = np.concatenate((X['train'], X['valid'], X['test']))
    entities_subset = np.concatenate(
        [X['test'][::1000, 0], X['test'][::1000, 2]], 0)

    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(X['test'][::1000],
                                 model,
                                 X_filter,
                                 verbose=True,
                                 corrupt_side='s+o',
                                 use_default_protocol=True,
                                 entities_subset=entities_subset)
    ranks = ranks.reshape(-1)
    assert (np.sum(ranks > len(entities_subset)) == 0)
Ejemplo n.º 8
0
def test_evaluate_performance_filter_without_xtest():
    X = load_wn18()
    model = ComplEx(batches_count=10, seed=0, epochs=1, k=20, eta=10, loss='nll',
                    regularizer=None, optimizer='adam', optimizer_params={'lr': 0.01}, verbose=True)
    model.fit(X['train'])

    X_filter = np.concatenate((X['train'], X['valid'])) # filter does not contain X_test
    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(X['test'][::1000], model, X_filter, verbose=True, corrupt_side='s,o')
    assert(mrr_score(ranks)>0)
Ejemplo n.º 9
0
def kge(triples, kge_name, verbose):
    # Train test split
    t_size = math.ceil(len(triples)*0.2)
    X_train, X_test = train_test_split_no_unseen(triples, test_size=t_size)

    # Select kge_name
    if kge_name == 'complex':
        # ComplEx model
        model = ComplEx(batches_count=50,
                        epochs=300,
                        k=100,
                        eta=20,
                        optimizer='adam',
                        optimizer_params={'lr':1e-4},
                        loss='multiclass_nll',
                        regularizer='LP',
                        regularizer_params={'p':3, 'lambda':1e-5},
                        seed=0,
                        verbose=verbose)
    else:
        sys.exit('Given kge_name is not valid.')

    model.fit(X_train)

    #Embedding evaluation
    if verbose:
        filter_triples = np.concatenate((X_train, X_test))
        ranks = evaluate_performance(X_test,
                                     model=model,
                                     filter_triples=filter_triples,
                                     use_default_protocol=True,
                                     verbose=True)

        mrr = mrr_score(ranks)
        print("MRR: %.2f" % (mrr))
        mr = mr_score(ranks)
        print("MR: %.2f" % (mr))
        hits_10 = hits_at_n_score(ranks, n=10)
        print("Hits@10: %.2f" % (hits_10))
        hits_3 = hits_at_n_score(ranks, n=3)
        print("Hits@3: %.2f" % (hits_3))
        hits_1 = hits_at_n_score(ranks, n=1)
        print("Hits@1: %.2f" % (hits_1))

        print('''
        - Ampligraph example -
        MRR: 0.25
        MR: 4927.33
        Hits@10: 0.35
        Hits@3: 0.28
        Hits@1: 0.19
        ''')

    return model
Ejemplo n.º 10
0
def test_evaluate_RandomBaseline():
    model = RandomBaseline(seed=0)
    X = load_wn18()
    model.fit(X["train"])
    ranks = evaluate_performance(X["test"],
                                 model=model,
                                 use_default_protocol=False,
                                 corrupt_side='s+o',
                                 verbose=False)
    hits10 = hits_at_n_score(ranks, n=10)
    hits1 = hits_at_n_score(ranks, n=1)
    assert (hits10 == 0.0002 and hits1 == 0.0)
Ejemplo n.º 11
0
def kge(triples, kge_name, epochs, batch_size, learning_rate, seed, verbose):
    kge_name = parsed_args.kge
    kge_model_savepath = f'./temp/ampligraph.model'

    if not os.path.isfile(kge_model_savepath):
        #Embedding evaluation
        if verbose:
            # Train test split
            t_size = math.ceil(len(triples) * 0.2)
            X_train, X_test = train_test_split_no_unseen(triples,
                                                         test_size=t_size)

            eval_model = select_kge(kge_name, batch_size, epochs, seed,
                                    verbose)

            eval_model.fit(X_train)
            filter_triples = np.concatenate((X_train, X_test))
            ranks = evaluate_performance(X_test,
                                         model=eval_model,
                                         filter_triples=filter_triples,
                                         use_default_protocol=True,
                                         verbose=True)

            mrr = mrr_score(ranks)
            print("MRR: %.2f" % (mrr))
            mr = mr_score(ranks)
            print("MR: %.2f" % (mr))
            hits_10 = hits_at_n_score(ranks, n=10)
            print("Hits@10: %.2f" % (hits_10))
            hits_3 = hits_at_n_score(ranks, n=3)
            print("Hits@3: %.2f" % (hits_3))
            hits_1 = hits_at_n_score(ranks, n=1)
            print("Hits@1: %.2f" % (hits_1))

            print('''
            - Ampligraph example -
            MRR: 0.25
            MR: 4927.33
            Hits@10: 0.35
            Hits@3: 0.28
            Hits@1: 0.19
            ''')

        model = select_kge(kge_name, batch_size, epochs, seed, verbose)

        print('Training...')
        model.fit(np.array(triples))
        save_model(model, model_name_path=kge_model_savepath)
    else:
        model = restore_model(model_name_path=kge_model_savepath)

    return model
Ejemplo n.º 12
0
def test_evaluate_performance_too_many_entities_warning():
    X = load_yago3_10()
    model = TransE(batches_count=200,
                   seed=0,
                   epochs=1,
                   k=5,
                   eta=1,
                   verbose=True)
    model.fit(X['train'])

    # no entity list declared
    with pytest.warns(UserWarning):
        evaluate_performance(X['test'][::100],
                             model,
                             verbose=True,
                             corrupt_side='o')

    # with larger than threshold entity list
    with pytest.warns(UserWarning):
        # TOO_MANY_ENT_TH threshold is set to 50,000 entities. Using explicit value to comply with linting
        # and thus avoiding exporting unused global variable.
        entities_subset = np.union1d(np.unique(X["train"][:, 0]),
                                     np.unique(X["train"][:, 2]))[:50000]
        evaluate_performance(X['test'][::100],
                             model,
                             verbose=True,
                             corrupt_side='o',
                             entities_subset=entities_subset)

    # with small entity list (no exception expected)
    evaluate_performance(X['test'][::100],
                         model,
                         verbose=True,
                         corrupt_side='o',
                         entities_subset=entities_subset[:10])

    # with smaller dataset, no entity list declared (no exception expected)
    X_wn18rr = load_wn18rr()
    model_wn18 = TransE(batches_count=200,
                        seed=0,
                        epochs=1,
                        k=5,
                        eta=1,
                        verbose=True)
    model_wn18.fit(X_wn18rr['train'])
    evaluate_performance(X_wn18rr['test'][::100],
                         model_wn18,
                         verbose=True,
                         corrupt_side='o')
Ejemplo n.º 13
0
def compute_metrics(model, train_samples, test_samples):
    ranks = evaluate_performance(
        test_samples,
        model=model,
        filter_triples=train_samples,  # Corruption strategy filter defined above
        use_default_protocol=
        True,  # corrupt subj and obj separately while evaluating
        verbose=True)
    return {
        'MRR': mrr_score(ranks),
        'Hits@10': hits_at_n_score(ranks, n=10),
        'Hits@3': hits_at_n_score(ranks, n=3),
        'Hits@1': hits_at_n_score(ranks, n=1)
    }
Ejemplo n.º 14
0
def test_evaluate_performance_nll_complex():
    X = load_wn18()
    model = ComplEx(batches_count=10, seed=0, epochs=10, k=150, optimizer_params={'lr': 0.1}, eta=10, loss='nll',
                    optimizer='adagrad', verbose=True)
    model.fit(np.concatenate((X['train'], X['valid'])))

    filter = np.concatenate((X['train'], X['valid'], X['test']))
    ranks = evaluate_performance(X['test'][:200], model=model, filter_triples=filter, verbose=True)

    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("ranks: %s" % ranks)
    print("MRR: %f" % mrr)
    print("Hits@10: %f" % hits_10)
Ejemplo n.º 15
0
def test_large_graph_mode():
    set_entity_threshold(10)
    X = load_wn18()
    model = ComplEx(batches_count=100,
                    seed=555,
                    epochs=1,
                    k=50,
                    loss='multiclass_nll',
                    loss_params={'margin': 5},
                    verbose=True,
                    optimizer='sgd',
                    optimizer_params={'lr': 0.001})
    model.fit(X['train'])
    X_filter = np.concatenate((X['train'], X['valid'], X['test']), axis=0)
    evaluate_performance(X['test'][::1000],
                         model,
                         X_filter,
                         verbose=True,
                         corrupt_side='s,o')

    y = model.predict(X['test'][:1])
    print(y)
    reset_entity_threshold()
Ejemplo n.º 16
0
def score_samples(model, evaluation_samples, train_samples):
    skipped_samples = np.array(
        list(
            {tuple(i)
             for i in np.vstack((train_samples, evaluation_samples))}))
    ranks = evaluate_performance(
        evaluation_samples,
        model=model,
        filter_triples=
        skipped_samples,  # Corruption strategy filter defined above
        corrupt_side='s+o',
        use_default_protocol=
        False,  # corrupt subj and obj separately while evaluating
        verbose=True)
    return model.predict(evaluation_samples), ranks
Ejemplo n.º 17
0
def evaluate_predictions(X_unseen, positives_filter, model):
    unseen_filter = np.array(
        list({tuple(i)
              for i in np.vstack((positives_filter, X_unseen))}))

    ranks_unseen = evaluate_performance(
        X_unseen,
        model=model,
        filter_triples=unseen_filter,  # Corruption strategy filter defined above
        corrupt_side='s+o',
        use_default_protocol=
        False,  # corrupt subj and obj separately while evaluating
        verbose=True)

    scores = model.predict(X_unseen)
    return scores
Ejemplo n.º 18
0
def evaluate_model(X_test, model, positives_filter):
    ranks = evaluate_performance(
        X_test,
        model=model,
        filter_triples=
        positives_filter,  # Corruption strategy filter defined above
        use_default_protocol=
        True,  # corrupt subj and obj separately while evaluating
        verbose=True)

    mrr = mrr_score(ranks)
    print("MRR: %.2f" % (mrr))

    hits_10 = hits_at_n_score(ranks, n=10)
    print("Hits@10: %.2f" % (hits_10))
    hits_3 = hits_at_n_score(ranks, n=3)
    print("Hits@3: %.2f" % (hits_3))
    hits_1 = hits_at_n_score(ranks, n=1)
    print("Hits@1: %.2f" % (hits_1))
#                       'corrupt_side':'s+o'         # corrupt subject and object (but not at once)
#                   }
#           )

from ampligraph.latent_features import save_model, restore_model
# save_model(model, './best_model.pkl')
model = restore_model('./best_model.pkl')
if model.is_fitted:
    print('The model is fit!')
else:
    print('The model is not fit! Did you skip a step?')

from ampligraph.evaluation import evaluate_performance
ranks = evaluate_performance(X_test, 
                             model=model, 
                             filter_triples=positives_filter,   # Corruption strategy filter defined above 
                             use_default_protocol=True, # corrupt subj and obj separately while evaluating
                             verbose=True)

from ampligraph.evaluation import mr_score, mrr_score, hits_at_n_score

mrr = mrr_score(ranks)
print("MRR: %.2f" % (mrr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
print("Hits@1: %.2f" % (hits_1))
Ejemplo n.º 20
0
if model.is_fitted:
    print('The model is fit!')
else:
    print('The model is not fit! Did you skip a step?')

"""---
# 5. Evaluating a model
"""

from ampligraph.evaluation import evaluate_performance

"""## Running evaluation"""

ranks = evaluate_performance(data['test'], 
                             model=model, 
                             filter_triples=positives_filter,   # Corruption strategy filter defined above 
                             use_default_protocol=True, # corrupt subj and obj separately while evaluating
                             verbose=True)

"""## Metrics"""

from ampligraph.evaluation import mr_score, mrr_score, hits_at_n_score

mrr = mrr_score(ranks)
print("MRR: %.2f" % (mrr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
Ejemplo n.º 21
0
     # Save model at its best-performance point
     save_model(model, 'best_ampliGraph_model.pkl')
     del model  # Delete older model
     # Load recently save best-performance model
     model = restore_model('./best_ampliGraph_model.pkl')    
     if model.is_fitted:
         print('The model is fit!')
     else:
         print('The model is not fit! Did you skip a step?')
     
     # TRAINING: Evaluate model's performance
     test_X = filter_unseen_entities(test_X, model, verbose=True, strict=False)
     test_y = test_X[:,1]
     scores_validtn = evaluate_performance(test_X, 
                      model=model, 
                      filter_triples=positives_filter,  # positives_filter # Corruption strategy filter defined above
                      use_default_protocol=True,  # corrupt subj and obj separately while evaluating
                      strict=False,
                      verbose=True)
     
     end_time = time.time()  # STOP: Training Time Tracker
     print("\nTraining Time:", end_time - start_time, "seconds")  # PRINT: Training Time Tracker
     print("Training Time:", end_time - start_time, "seconds", file=log_file)
 
     pred_y_res = model.predict(test_X)
     pred_y_proba = expit(pred_y_res)
     
     # Evalute results via ML standards
     ground_truth = test_y  # Already NUMPY and 'int32'
     predictions = np.rint(pred_y_proba).astype(np.int32)
     predictions_proba = np.round(pred_y_proba, decimals=2).astype(np.float32)
     ground_truth, predictions = to_categorical(ground_truth, dtype=np.int32), to_categorical(predictions, dtype=np.int32)
Ejemplo n.º 22
0
def run_single_exp(config, dataset, model):
    hyperparams = config["hyperparams"][dataset][model]
    if hyperparams is None:
        print("dataset {0}...model {1} \
                      experiment is not conducted yet..." \
                     .format(dataset, config["model_name_map"][model]))
        return {"hyperparams": ".??"}
    print("dataset {0}...model {1}...\
                  hyperparameter:...{2}" \
                 .format(dataset,
                         config["model_name_map"][model],
                         hyperparams))

    es_code = "{0}_{1}".format(dataset, model)

    load_func = getattr(ampligraph.datasets,
                        config["load_function_map"][dataset])
    X = load_func()
    # logging.debug("Loaded...{0}...".format(dataset))

    # load model
    model_class = getattr(ampligraph.latent_features,
                          config["model_name_map"][model])
    model = model_class(**hyperparams)
    # Fit the model on training and validation set
    # The entire dataset will be used to filter out false positives statements
    # created by the corruption procedure:
    filter = np.concatenate((X['train'], X['valid'], X['test']))

    if es_code in config["no_early_stopping"]:
        logging.debug("Fit without early stopping...")
        model.fit(X["train"])
    else:
        logging.debug("Fit with early stopping...")
        model.fit(
            X["train"], True, {
                'x_valid': X['valid'][::10],
                'criteria': 'mrr',
                'x_filter': filter,
                'stop_interval': 2,
                'burn_in': 0,
                'check_interval': 100
            })

    # Run the evaluation procedure on the test set. Will create filtered rankings.
    # To disable filtering: filter_triples=None
    ranks = evaluate_performance(X['test'], model, filter, verbose=False)

    # compute and print metrics:
    mr = mr_score(ranks)
    mrr = mrr_score(ranks)
    hits_1 = hits_at_n_score(ranks, n=1)
    hits_3 = hits_at_n_score(ranks, n=3)
    hits_10 = hits_at_n_score(ranks, n=10)

    return {
        "mr": mr,
        "mrr": mrr,
        "H@1": hits_1,
        "H@3": hits_3,
        "H@10": hits_10,
        "hyperparams": hyperparams
    }
Ejemplo n.º 23
0
  # Client and the SPARQL Endpoint
  
  endpoint = 'http://10.161.202.101:8890/sparql/'
  port = 8890
  output_format = HttpClientDataFormat.PANDAS_DF
  client = HttpClient(endpoint_url=endpoint, port=port, return_format=output_format, timeout=timeout,
                      default_graph_uri=default_graph_url, max_rows=max_rows)
  
  # Get all triples where the object is a URI
  dataset = graph.feature_domain_range(s, p, o).filter({o: ['isURI']})
  
  # execute 
  df = dataset.execute(client, return_format=output_format)
    
  # Train/test split and create ComplEx model from ampligraph library
  
  triples = df.to_numpy()
  X_train, X_test = train_test_split_no_unseen(triples, test_size=10000)
  
  # use ComplEx model to build the embedding 
  model = ComplEx(batches_count=50,epochs=300,k=100,eta=20, optimizer='adam',optimizer_params={'lr':1e-4}, 
          loss='multiclass_nll',regularizer='LP', regularizer_params={'p':3, 'lambda':1e-5}, seed=0,verbose=True)
  model.fit(X_train)
  
  # Evaluate the embedding model
  filter_triples = np.concatenate((X_train, X_test))
  ranks = evaluate_performance(X_test, model=model, filter_triples=filter_triples,
                                use_default_protocol=True, verbose=True)
  mr  = mr_score(ranks)
  mrr = mrr_score(ranks)
Ejemplo n.º 24
0
                epochs=20,
                k=50,
                eta=2,
                loss="nll",
                optimizer="adam",
                optimizer_params={"lr": 0.01})

model.fit(X['train'])

y_pred = model.predict(X['test'][:5, ])

from scipy.special import expit

print(expit(y_pred))

ranks = evaluate_performance(X['test'][:10], model=model)
print(ranks)

mrr = mrr_score(ranks)
hits_10 = hits_at_n_score(ranks, n=10)
print("MRR: %f, Hits@10: %f" % (mrr, hits_10))

import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

embs = model.get_embeddings(embs_labels, type='entity')
embs_2d = TSNE(n_components=2).fit_transform(embs)

fig, ax = plt.subplots()
ax.scatter(embs_2d[:, 0], embs_2d[:, 1])
for i, lab in enumerate(embs_labels):
Ejemplo n.º 25
0
def main():
    # load Wordnet18 dataset:
    # X = load_wn18()
    X = load_fb15k_237()
    modify_flag = False

    # Initialize a ComplEx neural embedding model with pairwise loss function:
    # The model will be trained for 300 epochs.
    model = ComplEx(
        batches_count=10,
        seed=0,
        epochs=30,
        k=150,
        eta=10,
        # Use adam optimizer with learning rate 1e-3
        optimizer='adam',
        optimizer_params={'lr': 1e-3},
        # Use pairwise loss with margin 0.5
        loss='pairwise',
        loss_params={'margin': 0.5},
        # Use L2 regularizer with regularizer weight 1e-5
        regularizer='LP',
        regularizer_params={
            'p': 2,
            'lambda': 1e-5
        },
        # Enable stdout messages (set to false if you don't want to display)
        verbose=True)  #, modify_flag = modify_flag)

    if False:
        # ground truth params (have not tried yet)
        # k: 350; epochs: 4000; eta: 30; loss: self_adversarial; loss_params: alpha: 1; margin: 0.5; optimizer: adam; optimizer_params: lr: 0.0001; seed: 0; batches_count: 50
        model = ComplEx(
            batches_count=50,
            seed=0,
            epochs=4000,
            k=350,
            eta=30,
            # Use adam optimizer with learning rate 1e-3
            optimizer='adam',
            optimizer_params={'lr': 1e-4},
            # Use pairwise loss with margin 0.5
            loss='self_adversarial',
            loss_params={
                'margin': 0.5,
                'alpha': 1
            },
            # Use L2 regularizer with regularizer weight 1e-5
            regularizer='LP',
            regularizer_params={
                'p': 2,
                'lambda': 1e-5
            },
            # Enable stdout messages (set to false if you don't want to display)
            verbose=True,
            modify_flag=modify_flag)

    # For evaluation, we can use a filter which would be used to filter out
    # positives statements created by the corruption procedure.
    # Here we define the filter set by concatenating all the positives
    filter = np.concatenate((X['train'], X['valid'], X['test']))

    # Fit the model on training and validation set
    model.fit(X['train'],
              early_stopping=True,
              early_stopping_params= \
                  {
                      'x_valid': X['valid'],  # validation set
                      'criteria': 'hits10',  # Uses hits10 criteria for early stopping
                      'burn_in': 100,  # early stopping kicks in after 100 epochs
                      'check_interval': 20,  # validates every 20th epoch
                      'stop_interval': 5,  # stops if 5 successive validation checks are bad.
                      'x_filter': filter,  # Use filter for filtering out positives
                      'corruption_entities': 'all',  # corrupt using all entities
                      'corrupt_side': 's+o'  # corrupt subject and object (but not at once)
                  }
              )

    # Run the evaluation procedure on the test set (with filtering).
    # To disable filtering: filter_triples=None
    # Usually, we corrupt subject and object sides separately and compute ranks

    # restore model
    sys.exit()

    # import the inspect_checkpoint library

    sys.exit()
    ranks = evaluate_performance(
        X['test'],
        model=model,
        filter_triples=filter,
        use_default_protocol=
        True,  # corrupt subj and obj separately while evaluating
        verbose=True)

    # compute and print metrics:
    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("MRR: %f, Hits@10: %f" % (mrr, hits_10))