.. codeauthor: svitlana vakulenko <*****@*****.**> Evaluate entity linking performance and store annotations ''' # setup dataset_name = 'lcquad' import os from setup import IndexSearch, Mongo_Connector, load_embeddings e_vectors = load_embeddings('fasttext_e_labels') e_index = IndexSearch('dbpedia201604e') mongo = Mongo_Connector('kbqa', dataset_name) # match and save matched entity URIs to MongoDB loaded = False limit = None string_cutoff = 50 # maximum number of candidate entities per mention semantic_cutoff = 1000 max_degree = 50000 max_triples = 10000 # path to KG relations from hdt import HDTDocument hdt_path = "/home/zola/Projects/hdt-cpp-molecules/libhdt/data/" hdt_file = 'dbpedia2016-04en.hdt' namespace = "http://dbpedia.org/"
Created on Feb 20, 2018 .. codeauthor: svitlana vakulenko <*****@*****.**> Final evaluation script for comparison with the benchmark ''' # setup dataset_name = 'lcquad' embeddings_choice = 'glove840B300d' # connect to DB storing the dataset from setup import Mongo_Connector, load_embeddings, IndexSearch mongo = Mongo_Connector('kbqa', dataset_name) # path to KG relations from hdt import HDTDocument hdt_path = "/home/zola/Projects/hdt-cpp-molecules/libhdt/data/" hdt_file = 'dbpedia2016-04en.hdt' namespace = "predef-dbpedia2016-04" import time from collections import defaultdict import numpy as np import scipy.sparse as sp # entity and predicate catalogs e_index = IndexSearch('dbpedia201604e')
''' Created on Feb 20, 2018 .. codeauthor: svitlana vakulenko <*****@*****.**> Final evaluation script for comparison with the benchmark ''' # setup dataset_name = 'lcquad' embeddings_choice = 'glove840B300d' # connect to DB storing the dataset from setup import Mongo_Connector, load_embeddings, IndexSearch mongo = Mongo_Connector('kbqa', dataset_name) # path to KG relations from hdt import HDTDocument # hdt_path = "/home/zola/Projects/hdt-cpp-molecules/libhdt/data/" hdt_path = "/mnt/ssd/sv/" hdt_file = 'dbpedia2016-04en.hdt' # namespace = "http://dbpedia.org/" namespace = "predef-dbpedia2016-04" import time from collections import defaultdict import numpy as np import scipy.sparse as sp # entity and predicate catalogs