Exemple #1
0
def predict_missing_links(train_file_path, evaluation_file_path, model_path,
                          tensorboard_visualizations_path):
    graph = load_from_csv('.', train_file_path, sep=',')
    evaluation_samples = load_from_csv('.', evaluation_file_path, sep=',')

    print('Head of the loaded graph: ')
    print(graph[:5])

    train_samples, test_samples = split(graph)
    print(
        f'Divided into train and test subsets with shapes {train_samples.shape} and {test_samples.shape} respectively.'
    )

    if not os.path.isfile(model_path):
        model = train_transe(train_samples)  # train_complex(train_samples)
        save_model(model, model_path)
    else:
        model = restore_model(model_path)

    metrics = compute_metrics(model, train_samples, test_samples)
    print(f'{"metric":10s}: {"score":5s}')
    for metric, score in metrics.items():
        print(f'{metric:10s}: {score:<5.2f}')

    scores, ranks = score_samples(model, evaluation_samples, train_samples)
    evaluation_summary = summarize(scores, evaluation_samples, ranks)

    print(evaluation_summary)

    if tensorboard_visualizations_path:
        os.makedirs(tensorboard_visualizations_path, exist_ok=True)
        create_tensorboard_visualizations(model,
                                          tensorboard_visualizations_path)
Exemple #2
0
def test_create_tensorboard_visualizations():
    # test if tensorflow API are still operative

    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'],
                  ['b', 'y', 'c'], ['f', 'y', 'e']])
    model = TransE(batches_count=1,
                   seed=555,
                   epochs=20,
                   k=10,
                   loss='pairwise',
                   loss_params={'margin': 5})
    model.fit(X)
    create_tensorboard_visualizations(model, 'tensorboard_files')
Exemple #3
0
unseen_filter = np.array(list({tuple(i) for i in np.vstack((positives_filter, X_unseen))}))

ranks_unseen = evaluate_performance(
    X_unseen, 
    model=model, 
    filter_triples=unseen_filter,   # Corruption strategy filter defined above 
    corrupt_side = 's+o',
    use_default_protocol=False, # corrupt subj and obj separately while evaluating
    verbose=True
)

scores = model.predict(X_unseen)

from scipy.special import expit
probs = expit(scores)

pd.DataFrame(list(zip([' '.join(x) for x in X_unseen], 
                      ranks_unseen, 
                      np.squeeze(scores),
                      np.squeeze(probs))), 
             columns=['statement', 'rank', 'score', 'prob']).sort_values("score")

"""---
# 7. Visualizing Embeddings with Tensorboard projector
"""

from ampligraph.utils import create_tensorboard_visualizations

create_tensorboard_visualizations(model, 'GoT_embeddings')