Beispiel #1
0
# connect to DB storing the dataset
from setup import Mongo_Connector, load_embeddings, IndexSearch
mongo = Mongo_Connector('kbqa', dataset_name)

# entity and predicate catalogs
e_index = IndexSearch('dbpedia201604e')
p_index = IndexSearch('dbpedia201604p')

# path to KG relations
from hdt import HDTDocument
hdt_path = "/home/zola/Projects/hdt-cpp-molecules/libhdt/data/"
hdt_file = 'dbpedia2016-04en.hdt'
namespace = "http://dbpedia.org/"

word_vectors = load_embeddings(embeddings_choice)

from collections import defaultdict

import numpy as np
import scipy.sparse as sp

from keras.models import Model, Input
from keras.layers import LSTM, Embedding, Dense, Bidirectional, TimeDistributed
from keras.optimizers import *
from keras.preprocessing.text import text_to_word_sequence

# load pre-trained Q type network
modelname = 'qtype'

                # store the activation values per id answer id
                for i, e in enumerate(entities):
                    if e in activations1:
                        activations[e] += y[i]


# hold average stats for the model performance over the samples
from collections import Counter

verbose = False
limit = None

question_types = ['SELECT', 'ASK', 'COUNT']

# embeddings
word_vectors = load_embeddings(embeddings_choice)
p_vectors = load_embeddings('fasttext_p_labels')

errors_1 = [
    '67', '138', '392', '467', '563', '581', '601', '723', '741', '785', '920',
    '951', '952', '1029', '1070', '1140', '1142', '1149', '1219', '1253',
    '1325', '1461', '1485', '1620', '1626', '1640', '1669', '1680', '1687',
    '1762', '1866', '1918', '2039', '2191', '2205', '2395', '2398', '2455',
    '2547', '2557', '2569', '2613', '2732', '2739', '2745', '2833', '2854',
    '2872', '2873', '2983', '3142', '3267', '3282', '3288', '3295', '3450',
    '3458', '3466', '3487', '3508', '3738', '3757', '3767', '3792', '3799',
    '3813', '3824', '3864', '3944', '3975', '4034', '4068', '4125', '4139',
    '4172', '4219', '4339', '4352', '4418', '4465', '4466', '4486', '4487',
    '4489', '4566', '4683', '4703', '4724', '4729', '4732', '4739'
]
errors_e = [
Beispiel #3
0
'''
Created on Jan 8, 2018

.. codeauthor: svitlana vakulenko
    <*****@*****.**>

Evaluate entity linking performance and store annotations
'''

# setup
dataset_name = 'lcquad'

import os
from setup import IndexSearch, Mongo_Connector, load_embeddings

e_vectors = load_embeddings('fasttext_e_labels')
e_index = IndexSearch('dbpedia201604e')
mongo = Mongo_Connector('kbqa', dataset_name)

# match and save matched entity URIs to MongoDB
loaded = False

limit = None
string_cutoff = 50  # maximum number of candidate entities per mention
semantic_cutoff = 1000
max_degree = 50000
max_triples = 10000

# path to KG relations
from hdt import HDTDocument
hdt_path = "/home/zola/Projects/hdt-cpp-molecules/libhdt/data/"