def test_evaluate_performance(): X = load_wn18() model = ComplEx(batches_count=10, seed=0, epochs=10, k=150, eta=10, loss='pairwise', loss_params={'margin': 5}, regularizer=None, optimizer='adagrad', optimizer_params={'lr': 0.1}, verbose=True) model.fit(np.concatenate((X['train'], X['valid']))) filter = np.concatenate((X['train'], X['valid'], X['test'])) ranks = evaluate_performance(X['test'][:200], model=model, filter_triples=filter, verbose=True) mrr = mrr_score(ranks) hits_10 = hits_at_n_score(ranks, n=10) print("ranks: %s" % ranks) print("MRR: %f" % mrr) print("Hits@10: %f" % hits_10)
def test_evaluate_with_ent_subset_large_graph(): set_entity_threshold(1) X = load_wn18() model = ComplEx(batches_count=10, seed=0, epochs=2, k=10, eta=1, optimizer='sgd', optimizer_params={'lr': 1e-5}, loss='pairwise', loss_params={'margin': 0.5}, regularizer='LP', regularizer_params={'p': 2, 'lambda': 1e-5}, verbose=True) model.fit(X['train']) X_filter = np.concatenate((X['train'], X['valid'], X['test'])) all_nodes = set(X_filter[:, 0]).union(X_filter[:, 2]) entities_subset = np.random.choice(list(all_nodes), 100, replace=False) ranks = evaluate_performance(X['test'][::10], model=model, filter_triples=X_filter, corrupt_side='o', use_default_protocol=False, entities_subset=list(entities_subset), verbose=True) assert np.sum(ranks > (100 + 1)) == 0, "No ranks must be greater than 101" reset_entity_threshold()
def test_find_clusters(): X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'x', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model = ComplEx(k=2, batches_count=2) model.fit(X) clustering_algorithm = DBSCAN(eps=1e-3, min_samples=1) labels = find_clusters(X, model, clustering_algorithm, mode='triple') assert np.array_equal(labels, np.array([0, 1, 2, 3, 4, 5, 6, 7])) labels = find_clusters(np.unique(X[:, 0]), model, clustering_algorithm, mode='entity') assert np.array_equal(labels, np.array([0, 1, 2, 3])) labels = find_clusters(np.unique(X[:, 1]), model, clustering_algorithm, mode='relation') assert np.array_equal(labels, np.array([0, 1])) labels = find_clusters(np.unique(X[:, 2]), model, clustering_algorithm, mode='entity') assert np.array_equal(labels, np.array([0, 1, 2, 3, 4])) with pytest.raises(ValueError): find_clusters(X, model, clustering_algorithm, mode='hah') with pytest.raises(ValueError): find_clusters(X, model, clustering_algorithm, mode='entity') with pytest.raises(ValueError): find_clusters(X, model, clustering_algorithm, mode='relation') with pytest.raises(ValueError): find_clusters(np.unique(X[:, 0]), model, clustering_algorithm, mode='triple')
def test_evaluate_performance_ranking_against_specified_entities(): X = load_wn18() model = ComplEx(batches_count=10, seed=0, epochs=1, k=20, eta=10, loss='nll', regularizer=None, optimizer='adam', optimizer_params={'lr': 0.01}, verbose=True) model.fit(X['train']) X_filter = np.concatenate((X['train'], X['valid'], X['test'])) entities_subset = np.concatenate( [X['test'][::1000, 0], X['test'][::1000, 2]], 0) from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score ranks = evaluate_performance(X['test'][::1000], model, X_filter, verbose=True, corrupt_side='s+o', use_default_protocol=True, entities_subset=entities_subset) ranks = ranks.reshape(-1) assert (np.sum(ranks > len(entities_subset)) == 0)
def generate_model(X): X_train, X_test = train_test_split_no_unseen(X, test_size=100) print('Train set size: ', X_train.shape) print('Test set size: ', X_test.shape) model = ComplEx(batches_count=100, seed=0, epochs=10, k=150, eta=5, optimizer='adam', optimizer_params={'lr': 1e-3}, loss='multiclass_nll', regularizer='LP', regularizer_params={ 'p': 3, 'lambda': 1e-5 }, verbose=True) #positives_filter = X tf.logging.set_verbosity(tf.logging.ERROR) model.fit(X_train, early_stopping=False) print("created the model") save_model(model, './best_model.pkl') return X_test
def test_evaluate_performance_ranking_against_shuffled_all_entities(): """ Compares mrr of test set by using default protocol against all entities vs mrr of corruptions generated by corrupting using entities_subset = all entities shuffled """ import random X = load_wn18() model = ComplEx(batches_count=10, seed=0, epochs=1, k=20, eta=10, loss='nll', regularizer=None, optimizer='adam', optimizer_params={'lr': 0.01}, verbose=True) model.fit(X['train']) X_filter = np.concatenate((X['train'], X['valid'], X['test'])) entities_subset = random.shuffle(list(model.ent_to_idx.keys())) from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score ranks_all = evaluate_performance(X['test'][::1000], model, X_filter, verbose=True, corrupt_side='s,o') ranks_suffled_ent = evaluate_performance(X['test'][::1000], model, X_filter, verbose=True, corrupt_side='s,o', entities_subset=entities_subset) assert (mrr_score(ranks_all) == mrr_score(ranks_suffled_ent))
def test_evaluate_performance_so_side_corruptions_without_filter(): X = load_wn18() model = ComplEx(batches_count=10, seed=0, epochs=5, k=200, eta=10, loss='nll', regularizer=None, optimizer='adam', optimizer_params={'lr': 0.01}, verbose=True) model.fit(X['train']) X_filter = np.concatenate((X['train'], X['valid'], X['test'])) ranks = evaluate_performance(X['test'][::20], model, X_filter, verbose=True, use_default_protocol=False, corrupt_side='s+o') mrr = mrr_score(ranks) hits_10 = hits_at_n_score(ranks, n=10) print("ranks: %s" % ranks) print("MRR: %f" % mrr) print("Hits@10: %f" % hits_10) assert (mrr is not np.Inf)
def test_fit_predict_wn18_ComplEx(): X = load_wn18() model = ComplEx(batches_count=1, seed=555, epochs=5, k=100, loss='pairwise', loss_params={'margin': 1}, regularizer='LP', regularizer_params={'lambda': 0.1, 'p': 2}, optimizer='adagrad', optimizer_params={'lr':0.1}) model.fit(X['train']) y = model.predict(X['test'][:1], get_ranks=True) print(y)
def test_find_duplicates(): X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'x', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model = ComplEx(k=2, batches_count=2) model.fit(X) entities = set('a b c d e f'.split()) relations = set('x y'.split()) def asserts(tol, dups, ent_rel, subspace): assert tol > 0.0 assert len(dups) <= len(ent_rel) assert all(len(d) <= len(ent_rel) for d in dups) assert all(d.issubset(subspace) for d in dups) dups, tol = find_duplicates(X, model, mode='triple', tolerance='auto', expected_fraction_duplicates=0.5) asserts(tol, dups, X, {tuple(x) for x in X}) dups, tol = find_duplicates(X, model, mode='triple', tolerance=1.0) assert tol == 1.0 asserts(tol, dups, X, {tuple(x) for x in X}) dups, tol = find_duplicates(np.unique(X[:, 0]), model, mode='entity', tolerance='auto', expected_fraction_duplicates=0.5) asserts(tol, dups, entities, entities) dups, tol = find_duplicates(np.unique(X[:, 2]), model, mode='entity', tolerance='auto', expected_fraction_duplicates=0.5) asserts(tol, dups, entities, entities) dups, tol = find_duplicates(np.unique(X[:, 1]), model, mode='relation', tolerance='auto', expected_fraction_duplicates=0.5) asserts(tol, dups, relations, relations) with pytest.raises(ValueError): find_duplicates(X, model, mode='hah') with pytest.raises(ValueError): find_duplicates(X, model, mode='entity') with pytest.raises(ValueError): find_duplicates(X, model, mode='relation') with pytest.raises(ValueError): find_duplicates(np.unique(X[:, 0]), model, mode='triple')
def test_evaluate_performance_filter_without_xtest(): X = load_wn18() model = ComplEx(batches_count=10, seed=0, epochs=1, k=20, eta=10, loss='nll', regularizer=None, optimizer='adam', optimizer_params={'lr': 0.01}, verbose=True) model.fit(X['train']) X_filter = np.concatenate((X['train'], X['valid'])) # filter does not contain X_test from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score ranks = evaluate_performance(X['test'][::1000], model, X_filter, verbose=True, corrupt_side='s,o') assert(mrr_score(ranks)>0)
def kge(triples, kge_name, verbose): # Train test split t_size = math.ceil(len(triples)*0.2) X_train, X_test = train_test_split_no_unseen(triples, test_size=t_size) # Select kge_name if kge_name == 'complex': # ComplEx model model = ComplEx(batches_count=50, epochs=300, k=100, eta=20, optimizer='adam', optimizer_params={'lr':1e-4}, loss='multiclass_nll', regularizer='LP', regularizer_params={'p':3, 'lambda':1e-5}, seed=0, verbose=verbose) else: sys.exit('Given kge_name is not valid.') model.fit(X_train) #Embedding evaluation if verbose: filter_triples = np.concatenate((X_train, X_test)) ranks = evaluate_performance(X_test, model=model, filter_triples=filter_triples, use_default_protocol=True, verbose=True) mrr = mrr_score(ranks) print("MRR: %.2f" % (mrr)) mr = mr_score(ranks) print("MR: %.2f" % (mr)) hits_10 = hits_at_n_score(ranks, n=10) print("Hits@10: %.2f" % (hits_10)) hits_3 = hits_at_n_score(ranks, n=3) print("Hits@3: %.2f" % (hits_3)) hits_1 = hits_at_n_score(ranks, n=1) print("Hits@1: %.2f" % (hits_1)) print(''' - Ampligraph example - MRR: 0.25 MR: 4927.33 Hits@10: 0.35 Hits@3: 0.28 Hits@1: 0.19 ''') return model
def test_missing_entity_ComplEx(): X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model = ComplEx(batches_count=1, seed=555, epochs=2, k=5) model.fit(X) with pytest.raises(ValueError): model.predict(['a', 'y', 'zzzzzzzzzzz']) with pytest.raises(ValueError): model.predict(['a', 'xxxxxxxxxx', 'e']) with pytest.raises(ValueError): model.predict(['zzzzzzzz', 'y', 'e'])
def train_complex(train_samples: iter): model = ComplEx(batches_count=100, seed=0, epochs=200, k=150, eta=5, optimizer='adam', optimizer_params={'lr': 1e-3}, loss='multiclass_nll', regularizer='LP', regularizer_params={ 'p': 3, 'lambda': 1e-5 }, verbose=True) model.fit(train_samples, early_stopping=False) return model
def test_discover_facts(): X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model = ComplEx(batches_count=1, seed=555, epochs=2, k=5) with pytest.raises(ValueError): discover_facts(X, model) model.fit(X) with pytest.raises(ValueError): discover_facts(X, model, strategy='error') with pytest.raises(ValueError): discover_facts(X, model, strategy='random_uniform', target_rel='error')
def test_fit_predict_CompleEx(): model = ComplEx(batches_count=1, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 1}, regularizer='LP', regularizer_params={'lambda': 0.1, 'p': 2}, optimizer='adagrad', optimizer_params={'lr':0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model.fit(X) y_pred, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True) print(y_pred) assert y_pred[0] > y_pred[1]
def test_large_graph_mode_adam(): set_entity_threshold(10) X = load_wn18() model = ComplEx(batches_count=100, seed=555, epochs=1, k=50, loss='multiclass_nll', loss_params={'margin': 5}, verbose=True, optimizer='adam', optimizer_params={'lr': 0.001}) try: model.fit(X['train']) except Exception as e: print(str(e)) reset_entity_threshold()
def test_retrain(): model = ComplEx(batches_count=1, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 1}, regularizer='LP', regularizer_params={'lambda': 0.1, 'p': 2}, optimizer='adagrad', optimizer_params={'lr':0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model.fit(X) y_pred_1st, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True) model.fit(X) y_pred_2nd, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True) np.testing.assert_array_equal(y_pred_1st, y_pred_2nd)
def test_large_graph_mode(): set_entity_threshold(10) X = load_wn18() model = ComplEx(batches_count=100, seed=555, epochs=1, k=50, loss='multiclass_nll', loss_params={'margin': 5}, verbose=True, optimizer='sgd', optimizer_params={'lr': 0.001}) model.fit(X['train']) X_filter = np.concatenate((X['train'], X['valid'], X['test']), axis=0) evaluate_performance(X['test'][::1000], model, X_filter, verbose=True, corrupt_side='s,o') y = model.predict(X['test'][:1]) print(y) reset_entity_threshold()
from ampligraph.datasets import load_wn18 from ampligraph.latent_features import ComplEx from ampligraph.evaluation import evaluate_performance, hits_at_n_score, mrr_score X = load_wn18() model = ComplEx(batches_count=10, seed=0, epochs=20, k=50, eta=2, loss="nll", optimizer="adam", optimizer_params={"lr": 0.01}) model.fit(X['train']) y_pred = model.predict(X['test'][:5, ]) from scipy.special import expit print(expit(y_pred)) ranks = evaluate_performance(X['test'][:10], model=model) print(ranks) mrr = mrr_score(ranks) hits_10 = hits_at_n_score(ranks, n=10) print("MRR: %f, Hits@10: %f" % (mrr, hits_10)) import matplotlib.pyplot as plt
epochs=200, k=150, eta=5, optimizer='adam', optimizer_params={'lr':1e-3}, loss='multiclass_nll', regularizer='LP', regularizer_params={'p':3, 'lambda':1e-5}, verbose=True) positives_filter = X import tensorflow as tf tf.logging.set_verbosity(tf.logging.ERROR) model.fit(data['train'], early_stopping = False) """--- # 4. Saving and restoring a model """ from ampligraph.latent_features import save_model, restore_model save_model(model, './best_model.pkl') del model model = restore_model('./best_model.pkl') if model.is_fitted: print('The model is fit!')
eta=5, optimizer='adam', optimizer_params={'lr': 1e-3}, loss='multiclass_nll', regularizer='LP', regularizer_params={ 'p': 3, 'lambda': 1e-5 }, verbose=True) positives_filter = X tf.logging.set_verbosity(tf.logging.ERROR) print("Model training started...") model.fit(X_train, early_stopping=False) print("Save the model...") save_model(model, model_name_path=out_embeddings_file) print("Evaluating the model...") ranks = evaluate_performance(X_test, model=model, filter_triples=positives_filter, use_default_protocol=True, verbose=True) mrr = mrr_score(ranks) print("MRR: %.2f" % (mrr)) hits_10 = hits_at_n_score(ranks, n=10) print("Hits@10: %.2f" % (hits_10))
ignore_index=True) train_y, test_y, positives_filter = to_categorical( train_y, dtype=np.int32), to_categorical( test_y, dtype=np.int32), positives_filter.to_numpy(dtype=np.int32) print( "Shape of train_y: %s; Shape of test_y: %s; Shape of positives_filter: %s" % (train_y.shape, test_y.shape, positives_filter.shape)) # Feature Scaling: Normalize dataset via Generation of Embeddings print("\nFeature Scaling: Embeddings Generation") embed_dim = 100 embeds_model = ComplEx(k=embed_dim, verbose=True) tf.compat.v1.logging.set_verbosity( tf.compat.v1.logging.ERROR ) # TensorFlow will tell you all messages that have the label ERROR embeds_model.fit(positives_filter) embeds_source = embeds_model.get_embeddings(positives_filter[:, 0], embedding_type='entity') embeds_dest = embeds_model.get_embeddings(positives_filter[:, 2], embedding_type='entity') embeds = np.concatenate((embeds_source, embeds_dest), axis=1) train_sz = train_X_temp.shape[0] train_X, test_X = embeds[:train_sz, :], embeds[train_sz:, :] train_X = train_X.reshape( train_X.shape[0], 4, embed_dim ) # (samples, n_timesteps, feat_per_timestep) # n_timesteps=4 -> embeds_source(:, 2) & embeds_dest(:, 2) test_X = test_X.reshape( test_X.shape[0], 4, embed_dim ) # (samples, n_timesteps, feat_per_timestep) # n_timesteps=4 -> embeds_source(:, 2) & embeds_dest(:, 2)
K.clear_session() # Kills current TF comp-graph & creates a new one if (mdl[j] == "ComplEx"): model = ComplEx(verbose=True) elif (mdl[j] == "ConvKB"): model = ConvKB(verbose=True) elif (mdl[j] == "DistMult"): model = DistMult(verbose=True) elif (mdl[j] == "HolE"): model = HolE(verbose=True) elif (mdl[j] == "TransE"): model = TransE(verbose=True) elif (mdl[j] == "RandomBaseline"): model = RandomBaseline(verbose=True) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # TensorFlow will tell you all messages that have the label ERROR model.fit(train_X) # Save model at its best-performance point save_model(model, 'best_ampliGraph_model.pkl') del model # Delete older model # Load recently save best-performance model model = restore_model('./best_ampliGraph_model.pkl') if model.is_fitted: print('The model is fit!') else: print('The model is not fit! Did you skip a step?') # TRAINING: Evaluate model's performance test_X = filter_unseen_entities(test_X, model, verbose=True, strict=False) test_y = test_X[:,1] scores_validtn = evaluate_performance(test_X,
# Client and the SPARQL Endpoint endpoint = 'http://10.161.202.101:8890/sparql/' port = 8890 output_format = HttpClientDataFormat.PANDAS_DF client = HttpClient(endpoint_url=endpoint, port=port, return_format=output_format, timeout=timeout, default_graph_uri=default_graph_url, max_rows=max_rows) # Get all triples where the object is a URI dataset = graph.feature_domain_range(s, p, o).filter({o: ['isURI']}) # execute df = dataset.execute(client, return_format=output_format) # Train/test split and create ComplEx model from ampligraph library triples = df.to_numpy() X_train, X_test = train_test_split_no_unseen(triples, test_size=10000) # use ComplEx model to build the embedding model = ComplEx(batches_count=50,epochs=300,k=100,eta=20, optimizer='adam',optimizer_params={'lr':1e-4}, loss='multiclass_nll',regularizer='LP', regularizer_params={'p':3, 'lambda':1e-5}, seed=0,verbose=True) model.fit(X_train) # Evaluate the embedding model filter_triples = np.concatenate((X_train, X_test)) ranks = evaluate_performance(X_test, model=model, filter_triples=filter_triples, use_default_protocol=True, verbose=True) mr = mr_score(ranks) mrr = mrr_score(ranks)
def test_query_topn(): X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'x', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e'], ['a', 'z', 'f'], ['c', 'z', 'f'], ['b', 'z', 'f'], ]) model = ComplEx(k=2, batches_count=2) with pytest.raises(ValueError): # Model not fitted query_topn(model, top_n=2) model.fit(X) with pytest.raises(ValueError): query_topn(model, top_n=2) with pytest.raises(ValueError): query_topn(model, top_n=2, head='a') with pytest.raises(ValueError): query_topn(model, top_n=2, relation='y') with pytest.raises(ValueError): query_topn(model, top_n=2, tail='e') with pytest.raises(ValueError): query_topn(model, top_n=2, head='a', relation='y', tail='e') with pytest.raises(ValueError): query_topn(model, top_n=2, head='xx', relation='y') with pytest.raises(ValueError): query_topn(model, top_n=2, head='a', relation='yakkety') with pytest.raises(ValueError): query_topn(model, top_n=2, head='a', tail='sax') with pytest.raises(ValueError): query_topn(model, top_n=2, head='a', relation='x', rels_to_consider=['y', 'z']) with pytest.raises(ValueError): query_topn(model, top_n=2, head='a', tail='f', rels_to_consider=['y', 'z', 'error']) with pytest.raises(ValueError): query_topn(model, top_n=2, head='a', tail='e', rels_to_consider='y') with pytest.raises(ValueError): query_topn(model, top_n=2, head='a', relation='x', ents_to_consider=['zz', 'top']) with pytest.raises(ValueError): query_topn(model, top_n=2, head='a', tail='e', ents_to_consider=['a', 'b']) subj, pred, obj, top_n = 'a', 'x', 'e', 3 Y, S = query_topn(model, top_n=top_n, head=subj, relation=pred) assert len(Y) == len(S) assert len(Y) == top_n assert np.all(Y[:, 0] == subj) assert np.all(Y[:, 1] == pred) Y, S = query_topn(model, top_n=top_n, relation=pred, tail=obj) assert np.all(Y[:, 1] == pred) assert np.all(Y[:, 2] == obj) ents_to_con = ['a', 'b', 'c', 'd'] Y, S = query_topn(model, top_n=top_n, relation=pred, tail=obj, ents_to_consider=ents_to_con) assert np.all([x in ents_to_con for x in Y[:, 0]]) rels_to_con = ['y', 'x'] Y, S = query_topn(model, top_n=10, head=subj, tail=obj, rels_to_consider=rels_to_con) assert np.all([x in rels_to_con for x in Y[:, 1]]) Y, S = query_topn(model, top_n=10, relation=pred, tail=obj) assert all(S[i] >= S[i + 1] for i in range(len(S) - 1))
def main(): # load Wordnet18 dataset: # X = load_wn18() X = load_fb15k_237() modify_flag = False # Initialize a ComplEx neural embedding model with pairwise loss function: # The model will be trained for 300 epochs. model = ComplEx( batches_count=10, seed=0, epochs=30, k=150, eta=10, # Use adam optimizer with learning rate 1e-3 optimizer='adam', optimizer_params={'lr': 1e-3}, # Use pairwise loss with margin 0.5 loss='pairwise', loss_params={'margin': 0.5}, # Use L2 regularizer with regularizer weight 1e-5 regularizer='LP', regularizer_params={ 'p': 2, 'lambda': 1e-5 }, # Enable stdout messages (set to false if you don't want to display) verbose=True) #, modify_flag = modify_flag) if False: # ground truth params (have not tried yet) # k: 350; epochs: 4000; eta: 30; loss: self_adversarial; loss_params: alpha: 1; margin: 0.5; optimizer: adam; optimizer_params: lr: 0.0001; seed: 0; batches_count: 50 model = ComplEx( batches_count=50, seed=0, epochs=4000, k=350, eta=30, # Use adam optimizer with learning rate 1e-3 optimizer='adam', optimizer_params={'lr': 1e-4}, # Use pairwise loss with margin 0.5 loss='self_adversarial', loss_params={ 'margin': 0.5, 'alpha': 1 }, # Use L2 regularizer with regularizer weight 1e-5 regularizer='LP', regularizer_params={ 'p': 2, 'lambda': 1e-5 }, # Enable stdout messages (set to false if you don't want to display) verbose=True, modify_flag=modify_flag) # For evaluation, we can use a filter which would be used to filter out # positives statements created by the corruption procedure. # Here we define the filter set by concatenating all the positives filter = np.concatenate((X['train'], X['valid'], X['test'])) # Fit the model on training and validation set model.fit(X['train'], early_stopping=True, early_stopping_params= \ { 'x_valid': X['valid'], # validation set 'criteria': 'hits10', # Uses hits10 criteria for early stopping 'burn_in': 100, # early stopping kicks in after 100 epochs 'check_interval': 20, # validates every 20th epoch 'stop_interval': 5, # stops if 5 successive validation checks are bad. 'x_filter': filter, # Use filter for filtering out positives 'corruption_entities': 'all', # corrupt using all entities 'corrupt_side': 's+o' # corrupt subject and object (but not at once) } ) # Run the evaluation procedure on the test set (with filtering). # To disable filtering: filter_triples=None # Usually, we corrupt subject and object sides separately and compute ranks # restore model sys.exit() # import the inspect_checkpoint library sys.exit() ranks = evaluate_performance( X['test'], model=model, filter_triples=filter, use_default_protocol= True, # corrupt subj and obj separately while evaluating verbose=True) # compute and print metrics: mrr = mrr_score(ranks) hits_10 = hits_at_n_score(ranks, n=10) print("MRR: %f, Hits@10: %f" % (mrr, hits_10))