Beispiel #1
0
def test_evaluate_performance_too_many_entities_warning():
    X = load_yago3_10()
    model = TransE(batches_count=200,
                   seed=0,
                   epochs=1,
                   k=5,
                   eta=1,
                   verbose=True)
    model.fit(X['train'])

    # no entity list declared
    with pytest.warns(UserWarning):
        evaluate_performance(X['test'][::100],
                             model,
                             verbose=True,
                             corrupt_side='o')

    # with larger than threshold entity list
    with pytest.warns(UserWarning):
        # TOO_MANY_ENT_TH threshold is set to 50,000 entities. Using explicit value to comply with linting
        # and thus avoiding exporting unused global variable.
        entities_subset = np.union1d(np.unique(X["train"][:, 0]),
                                     np.unique(X["train"][:, 2]))[:50000]
        evaluate_performance(X['test'][::100],
                             model,
                             verbose=True,
                             corrupt_side='o',
                             entities_subset=entities_subset)

    # with small entity list (no exception expected)
    evaluate_performance(X['test'][::100],
                         model,
                         verbose=True,
                         corrupt_side='o',
                         entities_subset=entities_subset[:10])

    # with smaller dataset, no entity list declared (no exception expected)
    X_wn18rr = load_wn18rr()
    model_wn18 = TransE(batches_count=200,
                        seed=0,
                        epochs=1,
                        k=5,
                        eta=1,
                        verbose=True)
    model_wn18.fit(X_wn18rr['train'])
    evaluate_performance(X_wn18rr['test'][::100],
                         model_wn18,
                         verbose=True,
                         corrupt_side='o')
Beispiel #2
0
def test_select_best_model_ranking_random():
    X = load_wn18rr()
    model_class = TransE
    param_grid = {
        "batches_count": [50],
        "seed": 0,
        "epochs": [1],
        "k": [2, 50],
        "eta": [1],
        "loss": ["nll"],
        "loss_params": {},
        "embedding_model_params": {},
        "regularizer": [None],
        "regularizer_params": {},
        "optimizer": ["adagrad"],
        "optimizer_params": {
            "lr": lambda: np.log(np.random.uniform(1.00001, 1.1))
        }
    }

    best_model, best_params, best_mrr_train, ranks_test, test_results, experimental_history = select_best_model_ranking(
        model_class,
        X['train'],
        X['valid'][::5],
        X['test'][::10],
        param_grid,
        max_combinations=10)
    assert best_params['k'] in (2, 50)
    assert np.log(1.00001) <= best_params['optimizer_params']['lr'] <= np.log(
        100)
    assert len(experimental_history) == 10
    assert set(i["model_params"]["k"] for i in experimental_history) == {2, 50}
    assert np.all([
        np.log(1.00001) <= i["model_params"]["optimizer_params"]["lr"] <=
        np.log(100) for i in experimental_history
    ])
    assert len(
        set(
            frozenset(_flatten_nested_keys(i["model_params"]).items())
            for i in experimental_history)) == 10
    assert set(
        test_results.keys()) == {"mrr", "mr", "hits_1", "hits_3", "hits_10"}
    assert all(r >= 0 for r in test_results.values())
    assert all(not np.isnan(r) for r in test_results.values())
Beispiel #3
0
def test_select_best_model_ranking_grid():
    X = load_wn18rr()
    model_class = TransE
    param_grid = {
        "batches_count": [50],
        "seed": 0,
        "epochs": [1],
        "k": [2, 50],
        "eta": [1],
        "loss": ["nll"],
        "loss_params": {
        },
        "embedding_model_params": {
        },
        "regularizer": [None],

        "regularizer_params": {
        },
        "optimizer": ["adagrad"],
        "optimizer_params": {
            "lr": [1000.0, 0.0001]
        }
    }

    best_model, best_params, best_mrr_train, ranks_test, test_results, experimental_history = select_best_model_ranking(
        model_class,
        X['train'],
        X['valid'][::5],
        X['test'][::10],
        param_grid
    )

    assert best_params['k'] in (2, 50)
    assert best_params['optimizer_params']['lr'] == 0.0001
    assert len(experimental_history) == 4
    assert set(i["model_params"]["k"] for i in experimental_history) == {2, 50}
    assert set(i["model_params"]["optimizer_params"]["lr"] for i in experimental_history) == {1000.0, 0.0001}
    assert len(set(frozenset(_flatten_nested_keys(i["model_params"]).items()) for i in experimental_history)) == 4
    assert set(test_results.keys()) == {"mrr", "mr", "hits_1", "hits_3", "hits_10"}
    print(test_results.values())
    assert all(r >= 0 for r in test_results.values())
    assert all(not np.isnan(r) for r in test_results.values())
Beispiel #4
0
def test_wn18rr():
    wn18rr = load_wn18rr()

    ent_train = np.union1d(np.unique(wn18rr["train"][:, 0]),
                           np.unique(wn18rr["train"][:, 2]))
    ent_valid = np.union1d(np.unique(wn18rr["valid"][:, 0]),
                           np.unique(wn18rr["valid"][:, 2]))
    ent_test = np.union1d(np.unique(wn18rr["test"][:, 0]),
                          np.unique(wn18rr["test"][:, 2]))
    distinct_ent = np.union1d(np.union1d(ent_train, ent_valid), ent_test)
    distinct_rel = np.union1d(
        np.union1d(np.unique(wn18rr["train"][:, 1]),
                   np.unique(wn18rr["train"][:, 1])),
        np.unique(wn18rr["train"][:, 1]))

    assert len(wn18rr['train']) == 86835

    # - 210 because 210 triples containing unseen entities are removed
    assert len(wn18rr['valid']) == 3034 - 210

    # - 210 because 210 triples containing unseen entities are removed
    assert len(wn18rr['test']) == 3134 - 210
Beispiel #5
0
 def perform_test():
     X = load_wn18rr()
     k = 5
     unique_entities = np.unique(
         np.concatenate([X['train'][:, 0], X['train'][:, 2]], 0))
     unique_relations = np.unique(X['train'][:, 1])
     model = TransE(batches_count=100,
                    seed=555,
                    epochs=1,
                    k=k,
                    loss='multiclass_nll',
                    loss_params={'margin': 5},
                    verbose=True,
                    optimizer='sgd',
                    optimizer_params={'lr': 0.001})
     model.fit(X['train'])
     # verify ent and rel shapes
     assert (model.trained_model_params[0].shape[0] == len(unique_entities))
     assert (
         model.trained_model_params[1].shape[0] == len(unique_relations))
     # verify k
     assert (model.trained_model_params[0].shape[1] == k)
     assert (model.trained_model_params[1].shape[1] == k)