def predict_missing_links(train_file_path, evaluation_file_path, model_path, tensorboard_visualizations_path): graph = load_from_csv('.', train_file_path, sep=',') evaluation_samples = load_from_csv('.', evaluation_file_path, sep=',') print('Head of the loaded graph: ') print(graph[:5]) train_samples, test_samples = split(graph) print( f'Divided into train and test subsets with shapes {train_samples.shape} and {test_samples.shape} respectively.' ) if not os.path.isfile(model_path): model = train_transe(train_samples) # train_complex(train_samples) save_model(model, model_path) else: model = restore_model(model_path) metrics = compute_metrics(model, train_samples, test_samples) print(f'{"metric":10s}: {"score":5s}') for metric, score in metrics.items(): print(f'{metric:10s}: {score:<5.2f}') scores, ranks = score_samples(model, evaluation_samples, train_samples) evaluation_summary = summarize(scores, evaluation_samples, ranks) print(evaluation_summary) if tensorboard_visualizations_path: os.makedirs(tensorboard_visualizations_path, exist_ok=True) create_tensorboard_visualizations(model, tensorboard_visualizations_path)
def test_convkb_save_restore(): model = ConvKB(batches_count=2, seed=22, epochs=1, k=10, eta=1, embedding_model_params={ 'num_filters': 16, 'filter_sizes': [1], 'dropout': 0.0, 'is_trainable': True }, optimizer='adam', optimizer_params={'lr': 0.001}, loss='pairwise', loss_params={}, verbose=True) X = load_wn18() model.fit(X['train']) y1 = model.predict(X['test'][:10]) save_model(model, 'convkb.tmp') del model model = restore_model('convkb.tmp') y2 = model.predict(X['test'][:10]) assert np.all(y1 == y2) os.remove('convkb.tmp')
def test_conve_evaluation_protocol(): X = load_wn18() model = ConvE(batches_count=200, seed=22, epochs=1, k=10, embedding_model_params={ 'conv_filters': 16, 'conv_kernel_size': 3 }, optimizer='adam', optimizer_params={'lr': 0.01}, loss='bce', loss_params={}, regularizer=None, regularizer_params={ 'p': 2, 'lambda': 1e-5 }, verbose=True, low_memory=True) model.fit(X['train']) y1 = model.predict(X['test'][:5]) save_model(model, 'model.tmp') del model model = restore_model('model.tmp') y2 = model.predict(X['test'][:5]) assert np.all(y1 == y2) os.remove('model.tmp')
def kge(triples, kge_name, epochs, batch_size, learning_rate, seed, verbose): kge_name = parsed_args.kge kge_model_savepath = f'./temp/ampligraph.model' if not os.path.isfile(kge_model_savepath): #Embedding evaluation if verbose: # Train test split t_size = math.ceil(len(triples) * 0.2) X_train, X_test = train_test_split_no_unseen(triples, test_size=t_size) eval_model = select_kge(kge_name, batch_size, epochs, seed, verbose) eval_model.fit(X_train) filter_triples = np.concatenate((X_train, X_test)) ranks = evaluate_performance(X_test, model=eval_model, filter_triples=filter_triples, use_default_protocol=True, verbose=True) mrr = mrr_score(ranks) print("MRR: %.2f" % (mrr)) mr = mr_score(ranks) print("MR: %.2f" % (mr)) hits_10 = hits_at_n_score(ranks, n=10) print("Hits@10: %.2f" % (hits_10)) hits_3 = hits_at_n_score(ranks, n=3) print("Hits@3: %.2f" % (hits_3)) hits_1 = hits_at_n_score(ranks, n=1) print("Hits@1: %.2f" % (hits_1)) print(''' - Ampligraph example - MRR: 0.25 MR: 4927.33 Hits@10: 0.35 Hits@3: 0.28 Hits@1: 0.19 ''') model = select_kge(kge_name, batch_size, epochs, seed, verbose) print('Training...') model.fit(np.array(triples)) save_model(model, model_name_path=kge_model_savepath) else: model = restore_model(model_name_path=kge_model_savepath) return model
def test_save_and_restore_model(): models = ('ComplEx', 'TransE', 'DistMult') for model_name in models: module = importlib.import_module("ampligraph.latent_features.models") print('Doing save/restore testing for model class: ', model_name) class_ = getattr(module, model_name) model = class_(batches_count=2, seed=555, epochs=20, k=10, optimizer='adagrad', optimizer_params={'lr': 0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model.fit(X) example_name = 'helloworld.pkl' save_model(model, model_name_path=example_name) loaded_model = restore_model(model_name_path=example_name) assert loaded_model != None assert loaded_model.all_params == model.all_params assert loaded_model.is_fitted == model.is_fitted assert loaded_model.ent_to_idx == model.ent_to_idx assert loaded_model.rel_to_idx == model.rel_to_idx for i in range(len(loaded_model.trained_model_params)): npt.assert_array_equal(loaded_model.trained_model_params[i], model.trained_model_params[i]) y_pred_before, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True) y_pred_after, _ = loaded_model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True) npt.assert_array_equal(y_pred_after, y_pred_before) npt.assert_array_equal( loaded_model.get_embeddings(['a', 'b'], embedding_type='entity'), model.get_embeddings(['a', 'b'], embedding_type='entity')) os.remove(example_name)
def test_conve_fit_predict_save_restore(): X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) X_test = np.array([['f', 'y', 'a'], ['f', 'y', 'b']]) model = ConvE(batches_count=1, seed=22, epochs=1, k=10, embedding_model_params={ 'conv_filters': 16, 'conv_kernel_size': 3 }, optimizer='adam', optimizer_params={'lr': 0.01}, loss='bce', loss_params={}, regularizer=None, regularizer_params={ 'p': 2, 'lambda': 1e-5 }, verbose=True, low_memory=True) model.fit(X) y1 = model.predict(X_test) print(y1) save_model(model, 'model.tmp') del model model = restore_model('model.tmp') y2 = model.predict(X_test) assert np.all(y1 == y2) os.remove('model.tmp')
def test_convkb_save_restore(): X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) X_test = np.array([['f', 'y', 'a'], ['f', 'y', 'b']]) model = ConvKB(batches_count=1, seed=22, epochs=1, k=10, eta=1, embedding_model_params={ 'num_filters': 16, 'filter_sizes': [1], 'dropout': 0.0, 'is_trainable': True }, optimizer='adam', optimizer_params={'lr': 0.001}, loss='pairwise', loss_params={}, verbose=True) model.fit(X) y1 = model.predict(X_test) save_model(model, 'convkb.tmp') del model model = restore_model('convkb.tmp') y2 = model.predict(X_test) assert np.all(y1 == y2) os.remove('convkb.tmp')
""" model = ComplEx(batches_count=10, seed=0, epochs=60, k=50, eta=10, # Use adam optimizer with learning rate 1e-3 optimizer='adam', optimizer_params={'lr': 1e-3}, # Use pairwise loss with margin 0.5 loss='pairwise', loss_params={'margin': 0.5}, # Use L2 regularizer with regularizer weight 1e-5 regularizer='LP', regularizer_params={'p': 2, 'lambda': 1e-5}, # Enable stdout messages (set to false if you don't want to display) verbose=True)""" print("Training...") x_orig = load_wn18() model.fit(X_train) save_model(model, model_name_path=ke_model_path) model2 = TransE(verbose=True, k=3, epochs=40) model2.fit(X_train) save_model(model2, model_name_path=ke_model_path + '2') #filter_triples = np.concatenate((X_train, X_valid)) #filter = np.concatenate((X['train'], X['valid'], X['test'])) #ranks = evaluate_performance(X['test'], # model=model, # filter_triples=filter, # use_default_protocol=True, # corrupt subj and obj separately while evaluating # verbose=True) #mrr = mrr_score(ranks) #hits_10 = hits_at_n_score(ranks, n=10)
k=1, eta=20, optimizer='adam', optimizer_params={'lr': 1e-3}, loss='multiclass_nll', regularizer='LP', regularizer_params={ 'p': 3, 'lambda': 1e-5 }, seed=0, verbose=True) print("Training...") model.fit(X_train) save_model(model, model_name_path=ke_model_path) filter_triples = np.concatenate((X_train, X_valid)) else: model = restore_model(model_name_path=ke_model_path) from sklearn.decomposition import PCA import matplotlib.pyplot as plt import seaborn as sns from adjustText import adjust_text from incf.countryutils import transformations print("Extracting Embeddings..") id_to_name_map = { **dict(zip(df.home_team_id, df.home_team)),
# Ampligraph embedding model (train new model) ################################### model = TransE(batches_count=100, seed=555, epochs=100, k=100, loss='pairwise', optimizer='sgd', loss_params={ 'margin': 1.0, 'normalize_ent_emb': True }, verbose=True) model.fit(kg_triples.as_numpy_array()) # Save model for later usage, the it can be reloaded using load_model(os.path.join(experiment_dir,'model_transE.pkl')) save_model(model, os.path.join(out_dir, 'imdb_transE.pkl')) ##################### End ########### ALTERNATIVE ##################### ## OR ## Relaoad a pretrained model ############################# # Restore models trained using our modified restore model function ###################### # model=restore_model(os.path.join('/scratch/GW/pool0/gadelrab/multicut/output', 'yago_transE.pkl')) # Get vectors # print(model.ent_to_idx.items()) # print(target_entities.get_entities()[:30]) missing = list( filter(lambda e: e not in model.ent_to_idx, target_entities.get_entities()))
loss='multiclass_nll', regularizer='LP', regularizer_params={ 'p': 3, 'lambda': 1e-5 }, verbose=True) positives_filter = X tf.logging.set_verbosity(tf.logging.ERROR) print("Model training started...") model.fit(X_train, early_stopping=False) print("Save the model...") save_model(model, model_name_path=out_embeddings_file) print("Evaluating the model...") ranks = evaluate_performance(X_test, model=model, filter_triples=positives_filter, use_default_protocol=True, verbose=True) mrr = mrr_score(ranks) print("MRR: %.2f" % (mrr)) hits_10 = hits_at_n_score(ranks, n=10) print("Hits@10: %.2f" % (hits_10)) hits_3 = hits_at_n_score(ranks, n=3) print("Hits@3: %.2f" % (hits_3)) hits_1 = hits_at_n_score(ranks, n=1)
epochs=400, #Numero de iteraciones k=100, #Dimensionalidad del grafo eta=20, optimizer='adam', optimizer_params={'lr': 1e-4}, loss='multiclass_nll', regularizer='LP', regularizer_params={'p': 3, 'lambda': 1e-5}, seed=0, verbose=True) tf.logging.set_verbosity(tf.logging.ERROR) model.fit(X_train) save_model(model, model_name_path="../Data/KGEmbedModel.pkl") #Evaluar modelo filter_triples = np.concatenate((X_train, X_valid)) ranks = evaluate_performance(X_valid, model=model, filter_triples=filter_triples, use_default_protocol=True, verbose=True, filter_unseen=True) mr = mr_score(ranks) mrr = mrr_score(ranks) print("MRR: %.2f" % (mrr)) print("MR: %.2f" % (mr)) hits_10 = hits_at_n_score(ranks, n=10) print("Hits@10: %.2f" % (hits_10)) hits_3 = hits_at_n_score(ranks, n=3) print("Hits@3: %.2f" % (hits_3))