def test_evaluate_performance_TransE(): X = load_wn18() model = TransE(batches_count=10, seed=0, epochs=100, k=100, eta=5, optimizer_params={'lr': 0.1}, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad') model.fit(np.concatenate((X['train'], X['valid']))) filter_triples = np.concatenate((X['train'], X['valid'], X['test'])) ranks = evaluate_performance(X['test'][:200], model=model, filter_triples=filter_triples, verbose=True) # ranks = evaluate_performance(X['test'][:200], model=model) mrr = mrr_score(ranks) hits_10 = hits_at_n_score(ranks, n=10) print("ranks: %s" % ranks) print("MRR: %f" % mrr) print("Hits@10: %f" % hits_10)
def test_fit_predict_transE(): model = TransE(batches_count=1, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad', optimizer_params={'lr': 0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model.fit(X) y_pred = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']])) print(y_pred) assert y_pred[0] > y_pred[1]
def test_evaluate_performance_default_protocol_without_filter(): wn18 = load_wn18() model = TransE(batches_count=10, seed=0, epochs=1, k=50, eta=10, verbose=True, embedding_model_params={'normalize_ent_emb':False, 'norm': 1}, loss='self_adversarial', loss_params={'margin': 1, 'alpha': 0.5}, optimizer='adam', optimizer_params={'lr': 0.0005}) model.fit(wn18['train']) from ampligraph.evaluation import evaluate_performance ranks_sep = [] from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score ranks = evaluate_performance(wn18['test'][::100], model, verbose=True, corrupt_side='o') ranks_sep.extend(ranks) from ampligraph.evaluation import evaluate_performance from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score ranks = evaluate_performance(wn18['test'][::100], model, verbose=True, corrupt_side='s') ranks_sep.extend(ranks) print('----------EVAL WITHOUT FILTER-----------------') print('----------Subj and obj corrupted separately-----------------') mr_sep = mr_score(ranks_sep) print('MAR:', mr_sep) print('Mrr:', mrr_score(ranks_sep)) print('hits10:', hits_at_n_score(ranks_sep, 10)) print('hits3:', hits_at_n_score(ranks_sep, 3)) print('hits1:', hits_at_n_score(ranks_sep, 1)) from ampligraph.evaluation import evaluate_performance from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score ranks = evaluate_performance(wn18['test'][::100], model, verbose=True, corrupt_side='s,o') print('----------corrupted with default protocol-----------------') mr_joint = mr_score(ranks) mrr_joint = mrr_score(ranks) print('MAR:', mr_joint) print('Mrr:', mrr_score(ranks)) print('hits10:', hits_at_n_score(ranks, 10)) print('hits3:', hits_at_n_score(ranks, 3)) print('hits1:', hits_at_n_score(ranks, 1)) np.testing.assert_equal(mr_sep, mr_joint) assert(mrr_joint is not np.Inf)
def train_transe(train_samples: iter): model = TransE(batches_count=100, seed=0, epochs=200, k=150, eta=5, optimizer='adam', optimizer_params={'lr': 1e-3}, loss='multiclass_nll', regularizer='LP', regularizer_params={ 'p': 3, 'lambda': 1e-5 }, verbose=True) model.fit(train_samples, early_stopping=False) return model
def test_fit_predict_TransE_early_stopping_without_filter(): X = load_wn18() model = TransE(batches_count=1, seed=555, epochs=7, k=50, loss='pairwise', loss_params={'margin': 5}, verbose=True, optimizer='adagrad', optimizer_params={'lr': 0.1}) model.fit( X['train'], True, { 'x_valid': X['valid'][::100], 'criteria': 'mrr', 'stop_interval': 2, 'burn_in': 1, 'check_interval': 2 }) y = model.predict(X['test'][:1]) print(y)
def perform_test(): X = load_wn18rr() k = 5 unique_entities = np.unique( np.concatenate([X['train'][:, 0], X['train'][:, 2]], 0)) unique_relations = np.unique(X['train'][:, 1]) model = TransE(batches_count=100, seed=555, epochs=1, k=k, loss='multiclass_nll', loss_params={'margin': 5}, verbose=True, optimizer='sgd', optimizer_params={'lr': 0.001}) model.fit(X['train']) # verify ent and rel shapes assert (model.trained_model_params[0].shape[0] == len(unique_entities)) assert ( model.trained_model_params[1].shape[0] == len(unique_relations)) # verify k assert (model.trained_model_params[0].shape[1] == k) assert (model.trained_model_params[1].shape[1] == k)
def test_evaluate_performance_too_many_entities_warning(): X = load_yago3_10() model = TransE(batches_count=200, seed=0, epochs=1, k=5, eta=1, verbose=True) model.fit(X['train']) # no entity list declared with pytest.warns(UserWarning): evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o') # with larger than threshold entity list with pytest.warns(UserWarning): # TOO_MANY_ENT_TH threshold is set to 50,000 entities. Using explicit value to comply with linting # and thus avoiding exporting unused global variable. entities_subset = np.union1d(np.unique(X["train"][:, 0]), np.unique(X["train"][:, 2]))[:50000] evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o', entities_subset=entities_subset) # with small entity list (no exception expected) evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o', entities_subset=entities_subset[:10]) # with smaller dataset, no entity list declared (no exception expected) X_wn18rr = load_wn18rr() model_wn18 = TransE(batches_count=200, seed=0, epochs=1, k=5, eta=1, verbose=True) model_wn18.fit(X_wn18rr['train']) evaluate_performance(X_wn18rr['test'][::100], model_wn18, verbose=True, corrupt_side='o')
model = TransE(verbose=True, k=70, epochs=40) """ model = ComplEx(batches_count=10, seed=0, epochs=60, k=50, eta=10, # Use adam optimizer with learning rate 1e-3 optimizer='adam', optimizer_params={'lr': 1e-3}, # Use pairwise loss with margin 0.5 loss='pairwise', loss_params={'margin': 0.5}, # Use L2 regularizer with regularizer weight 1e-5 regularizer='LP', regularizer_params={'p': 2, 'lambda': 1e-5}, # Enable stdout messages (set to false if you don't want to display) verbose=True)""" print("Training...") x_orig = load_wn18() model.fit(X_train) save_model(model, model_name_path=ke_model_path) model2 = TransE(verbose=True, k=3, epochs=40) model2.fit(X_train) save_model(model2, model_name_path=ke_model_path + '2') #filter_triples = np.concatenate((X_train, X_valid)) #filter = np.concatenate((X['train'], X['valid'], X['test'])) #ranks = evaluate_performance(X['test'], # model=model, # filter_triples=filter, # use_default_protocol=True, # corrupt subj and obj separately while evaluating # verbose=True)
##### RUN ONLY ONCE ###### ################################## # Ampligraph embedding model (train new model) ################################### model = TransE(batches_count=100, seed=555, epochs=100, k=100, loss='pairwise', optimizer='sgd', loss_params={ 'margin': 1.0, 'normalize_ent_emb': True }, verbose=True) model.fit(kg_triples.as_numpy_array()) # Save model for later usage, the it can be reloaded using load_model(os.path.join(experiment_dir,'model_transE.pkl')) save_model(model, os.path.join(out_dir, 'imdb_transE.pkl')) ##################### End ########### ALTERNATIVE ##################### ## OR ## Relaoad a pretrained model ############################# # Restore models trained using our modified restore model function ###################### # model=restore_model(os.path.join('/scratch/GW/pool0/gadelrab/multicut/output', 'yago_transE.pkl')) # Get vectors # print(model.ent_to_idx.items()) # print(target_entities.get_entities()[:30]) missing = list(
epochs=200, k=150, eta=5, optimizer='adam', optimizer_params={'lr':1e-3}, loss='multiclass_nll', regularizer='LP', regularizer_params={'p':3, 'lambda':1e-5}, verbose=True) positives_filter = X import tensorflow as tf tf.logging.set_verbosity(tf.logging.ERROR) model.fit(data['train'], early_stopping = False) """--- # 4. Saving and restoring a model """ from ampligraph.latent_features import save_model, restore_model save_model(model, './best_model.pkm') del model model = restore_model('./best_model.pkm') if model.is_fitted: print('The model is fit!')