# connect to DB storing the dataset from setup import Mongo_Connector, load_embeddings, IndexSearch mongo = Mongo_Connector('kbqa', dataset_name) # entity and predicate catalogs e_index = IndexSearch('dbpedia201604e') p_index = IndexSearch('dbpedia201604p') # path to KG relations from hdt import HDTDocument hdt_path = "/home/zola/Projects/hdt-cpp-molecules/libhdt/data/" hdt_file = 'dbpedia2016-04en.hdt' namespace = "http://dbpedia.org/" word_vectors = load_embeddings(embeddings_choice) from collections import defaultdict import numpy as np import scipy.sparse as sp from keras.models import Model, Input from keras.layers import LSTM, Embedding, Dense, Bidirectional, TimeDistributed from keras.optimizers import * from keras.preprocessing.text import text_to_word_sequence # load pre-trained Q type network modelname = 'qtype'
# store the activation values per id answer id for i, e in enumerate(entities): if e in activations1: activations[e] += y[i] # hold average stats for the model performance over the samples from collections import Counter verbose = False limit = None question_types = ['SELECT', 'ASK', 'COUNT'] # embeddings word_vectors = load_embeddings(embeddings_choice) p_vectors = load_embeddings('fasttext_p_labels') errors_1 = [ '67', '138', '392', '467', '563', '581', '601', '723', '741', '785', '920', '951', '952', '1029', '1070', '1140', '1142', '1149', '1219', '1253', '1325', '1461', '1485', '1620', '1626', '1640', '1669', '1680', '1687', '1762', '1866', '1918', '2039', '2191', '2205', '2395', '2398', '2455', '2547', '2557', '2569', '2613', '2732', '2739', '2745', '2833', '2854', '2872', '2873', '2983', '3142', '3267', '3282', '3288', '3295', '3450', '3458', '3466', '3487', '3508', '3738', '3757', '3767', '3792', '3799', '3813', '3824', '3864', '3944', '3975', '4034', '4068', '4125', '4139', '4172', '4219', '4339', '4352', '4418', '4465', '4466', '4486', '4487', '4489', '4566', '4683', '4703', '4724', '4729', '4732', '4739' ] errors_e = [
''' Created on Jan 8, 2018 .. codeauthor: svitlana vakulenko <*****@*****.**> Evaluate entity linking performance and store annotations ''' # setup dataset_name = 'lcquad' import os from setup import IndexSearch, Mongo_Connector, load_embeddings e_vectors = load_embeddings('fasttext_e_labels') e_index = IndexSearch('dbpedia201604e') mongo = Mongo_Connector('kbqa', dataset_name) # match and save matched entity URIs to MongoDB loaded = False limit = None string_cutoff = 50 # maximum number of candidate entities per mention semantic_cutoff = 1000 max_degree = 50000 max_triples = 10000 # path to KG relations from hdt import HDTDocument hdt_path = "/home/zola/Projects/hdt-cpp-molecules/libhdt/data/"