def test_evaluate_performance_too_many_entities_warning(): X = load_yago3_10() model = TransE(batches_count=200, seed=0, epochs=1, k=5, eta=1, verbose=True) model.fit(X['train']) # no entity list declared with pytest.warns(UserWarning): evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o') # with larger than threshold entity list with pytest.warns(UserWarning): # TOO_MANY_ENT_TH threshold is set to 50,000 entities. Using explicit value to comply with linting # and thus avoiding exporting unused global variable. entities_subset = np.union1d(np.unique(X["train"][:, 0]), np.unique(X["train"][:, 2]))[:50000] evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o', entities_subset=entities_subset) # with small entity list (no exception expected) evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o', entities_subset=entities_subset[:10]) # with smaller dataset, no entity list declared (no exception expected) X_wn18rr = load_wn18rr() model_wn18 = TransE(batches_count=200, seed=0, epochs=1, k=5, eta=1, verbose=True) model_wn18.fit(X_wn18rr['train']) evaluate_performance(X_wn18rr['test'][::100], model_wn18, verbose=True, corrupt_side='o')
def test_select_best_model_ranking_random(): X = load_wn18rr() model_class = TransE param_grid = { "batches_count": [50], "seed": 0, "epochs": [1], "k": [2, 50], "eta": [1], "loss": ["nll"], "loss_params": {}, "embedding_model_params": {}, "regularizer": [None], "regularizer_params": {}, "optimizer": ["adagrad"], "optimizer_params": { "lr": lambda: np.log(np.random.uniform(1.00001, 1.1)) } } best_model, best_params, best_mrr_train, ranks_test, test_results, experimental_history = select_best_model_ranking( model_class, X['train'], X['valid'][::5], X['test'][::10], param_grid, max_combinations=10) assert best_params['k'] in (2, 50) assert np.log(1.00001) <= best_params['optimizer_params']['lr'] <= np.log( 100) assert len(experimental_history) == 10 assert set(i["model_params"]["k"] for i in experimental_history) == {2, 50} assert np.all([ np.log(1.00001) <= i["model_params"]["optimizer_params"]["lr"] <= np.log(100) for i in experimental_history ]) assert len( set( frozenset(_flatten_nested_keys(i["model_params"]).items()) for i in experimental_history)) == 10 assert set( test_results.keys()) == {"mrr", "mr", "hits_1", "hits_3", "hits_10"} assert all(r >= 0 for r in test_results.values()) assert all(not np.isnan(r) for r in test_results.values())
def test_select_best_model_ranking_grid(): X = load_wn18rr() model_class = TransE param_grid = { "batches_count": [50], "seed": 0, "epochs": [1], "k": [2, 50], "eta": [1], "loss": ["nll"], "loss_params": { }, "embedding_model_params": { }, "regularizer": [None], "regularizer_params": { }, "optimizer": ["adagrad"], "optimizer_params": { "lr": [1000.0, 0.0001] } } best_model, best_params, best_mrr_train, ranks_test, test_results, experimental_history = select_best_model_ranking( model_class, X['train'], X['valid'][::5], X['test'][::10], param_grid ) assert best_params['k'] in (2, 50) assert best_params['optimizer_params']['lr'] == 0.0001 assert len(experimental_history) == 4 assert set(i["model_params"]["k"] for i in experimental_history) == {2, 50} assert set(i["model_params"]["optimizer_params"]["lr"] for i in experimental_history) == {1000.0, 0.0001} assert len(set(frozenset(_flatten_nested_keys(i["model_params"]).items()) for i in experimental_history)) == 4 assert set(test_results.keys()) == {"mrr", "mr", "hits_1", "hits_3", "hits_10"} print(test_results.values()) assert all(r >= 0 for r in test_results.values()) assert all(not np.isnan(r) for r in test_results.values())
def test_wn18rr(): wn18rr = load_wn18rr() ent_train = np.union1d(np.unique(wn18rr["train"][:, 0]), np.unique(wn18rr["train"][:, 2])) ent_valid = np.union1d(np.unique(wn18rr["valid"][:, 0]), np.unique(wn18rr["valid"][:, 2])) ent_test = np.union1d(np.unique(wn18rr["test"][:, 0]), np.unique(wn18rr["test"][:, 2])) distinct_ent = np.union1d(np.union1d(ent_train, ent_valid), ent_test) distinct_rel = np.union1d( np.union1d(np.unique(wn18rr["train"][:, 1]), np.unique(wn18rr["train"][:, 1])), np.unique(wn18rr["train"][:, 1])) assert len(wn18rr['train']) == 86835 # - 210 because 210 triples containing unseen entities are removed assert len(wn18rr['valid']) == 3034 - 210 # - 210 because 210 triples containing unseen entities are removed assert len(wn18rr['test']) == 3134 - 210
def perform_test(): X = load_wn18rr() k = 5 unique_entities = np.unique( np.concatenate([X['train'][:, 0], X['train'][:, 2]], 0)) unique_relations = np.unique(X['train'][:, 1]) model = TransE(batches_count=100, seed=555, epochs=1, k=k, loss='multiclass_nll', loss_params={'margin': 5}, verbose=True, optimizer='sgd', optimizer_params={'lr': 0.001}) model.fit(X['train']) # verify ent and rel shapes assert (model.trained_model_params[0].shape[0] == len(unique_entities)) assert ( model.trained_model_params[1].shape[0] == len(unique_relations)) # verify k assert (model.trained_model_params[0].shape[1] == k) assert (model.trained_model_params[1].shape[1] == k)