コード例 #1
0
    def __init__(self,
                 ques_src,
                 running_dir,
                 sub1_cands_dir,
                 sub1_openie_dir,
                 sub2_openie_dir,
                 sparql_host="141.212.110.80",
                 sparql_port="3093"):
        self.ques_src = ques_src
        self.running_dir = running_dir
        self.sub1_flat_file_path = os.path.join(running_dir, "sub1_lookup.csv")
        self.sub1_cands_dir = sub1_cands_dir
        self.sub2_cands_dir = os.path.join(running_dir, "sub2_cands")
        self.sub1_openie_dir = sub1_openie_dir
        self.sub2_openie_dir = sub2_openie_dir

        self.questions_dict = {}
        self.sparql = sparqlUtils(host=sparql_host, port=sparql_port)
        questions = json.load(codecs.open(ques_src, 'r', encoding='utf-8'))

        for q in questions:
            if q["compositionality_type"] == "composition" or q[
                    "compositionality_type"] == "conjunction":
                self.questions_dict[q["ID"]] = q

        self.test_index = pd.read_csv(self.sub1_flat_file_path, sep=',')
        self.test_index['index'] = self.test_index['index'].astype(int)
コード例 #2
0
    def __init__(self, ques_src, sub1_flat_file_path, sub1_cands_dir, sub2_cands_dir):
        self.complexqEndpoint = ComplexQuestionEndPoint()
        self.corechainGen = CoreChainGen()
        self.queryGraphGen = QueryGraphGen()

        self.ques_src = ques_src
        self.sub1_flat_file_path = sub1_flat_file_path
        self.sub1_cands_dir = sub1_cands_dir
        self.sub2_cands_dir = sub2_cands_dir
        self.forward_cache = {}
        self.forward_cache_fifo = []
        self.MAX_SIZE = 10000
        self.questions_dict = {}
        self.sparql = sparqlUtils()
        questions = json.load(codecs.open(ques_src, 'r', encoding='utf-8'))

        for q in questions:
            self.questions_dict[q["ID"]] = q

        self.template = Template('''
                                        PREFIX ns: <http://rdf.freebase.com/ns/>
                                        SELECT DISTINCT ?x
                                        WHERE {
                                            ${r}
                                            ?x ns:type.object.name ?name .
                                            ${f}   
                                        }
                                        ''')

        self.test_index = pd.read_csv(sub1_flat_file_path, sep=',')
        self.test_index['index'] = self.test_index['index'].astype(int)
コード例 #3
0
 def __init__(self):
     self.relations_to_filter = set(
         pd.read_csv(RELATIONS_FILTER, names=['rel']).rel)
     self.validate_subgraph = False
     self.sparql = sparqlUtils()
     self.entity_cache = {}
     self.is_bounded = True
コード例 #4
0
 def __init__(self):
     self.entity_linking_path = os.path.join(
         SMART_DATA_DIR,
         'webquestions.examples.%s.e2e.top10.filter.tsv' % (SPLIT))
     self.q_links_dict = self.load_webq_linking_data()
     self.sparql = sparqlUtils()
     self.type_dict = {}
     self.type_name_dict = {}
コード例 #5
0
 def __init__(self):
     self.sparql = sparqlUtils()
     self.cache_maxsize = 10000
     self.cvt_constraints_cache = {}
     self.cvt_constraints_cache_elements_fifo = []
     self.topic_entity_dict = {}
     self.type_dict = {}
     self.type_name_dict = {}
コード例 #6
0
 def __init__(self):
     self.sparql = sparqlUtils()
     self.topic_entity_dict = {}
     self.cache_maxsize = 10000
     self.cvt_constraints_cache = {}
     self.cvt_constraints_cache_elements_fifo = []
     self.topic_entity_dict = {}
     self.type_dict = {}
     self.type_name_dict = {}
     self.all_path_entity_cache = {}
     self.stopwords = set(stopwords.words('english'))
     self.lemmatizer = WordNetLemmatizer()
コード例 #7
0
    def __init__(self):
        self.sparql = sparqlUtils()
        self.cache_maxsize = 10000

        self.corechainGen = CoreChainGen()
        self.queryGraphGen = QueryGraphGen()

        self.connecting_path_entity_cache = {}
        self.path_entity_cache_elements_fifo = []

        self.cache_maxsize = 10000
        self.cvt_constraints_cache = {}
        self.cvt_constraints_cache_elements_fifo = []

        self.type_dict = {}
        self.type_name_dict = {}
コード例 #8
0
 def __init__(self, ques_src, lookup_path, kb_cands_dir, openie_cands_dir):
     self.sparql = sparqlUtils()
     self.ques_src = ques_src
     self.kb_cands_dir = kb_cands_dir
     self.lookup = pd.read_csv(lookup_path, sep=',')
     self.lookup['index'] = self.lookup['index'].astype(int)
     self.openie_cands_dir = openie_cands_dir
     self.questions = json.load(codecs.open(ques_src, 'r', encoding='utf-8'))["Questions"]
     self.ground_ans_dict = {}
     for q in self.questions:
         ques_id = q["QuestionId"]
         parses = q.get("Parses", [])
         entity_ans_dict = {}
         for parse in parses:
             topic_entity = parse["TopicEntityMid"]
             answers = parse.get("Answers", [])
             entity_ans_dict[topic_entity] = [a["AnswerArgument"] for a in answers]
         self.ground_ans_dict[ques_id] = entity_ans_dict
コード例 #9
0
def check_shortestPath(raw_questions):
    SPARQL = sparqlUtils()
    max_len = 0
    lens = []
    no_path = 0
    no_path_query = open(os.path.join(INPUT_PREFIX, 'no_path_query.txt'), 'w+')
    for raw_question in raw_questions:
        curr_len = -1
        parses = raw_question[u"Parses"]
        for parse in parses:
            topic_entity = parse[u"TopicEntityMid"]
            answer_entities = []
            answers = parse[u"Answers"]
            for answer in answers:
                answer_entities.append(answer[u"AnswerArgument"])
            for idx in range(len(answer_entities)):
                answer_entity = answer_entities[idx]
                if answer_entity[0:2] != 'm.':
                    answer_entity = '''"''' + answer_entity + '''^^^xsd:dateTime'''
                try:
                    this_len = SPARQL.shortestPathLength(
                        topic_entity, answer_entity)
                    curr_len = max(this_len, curr_len)
                except Exception:
                    continue

        max_len = max(max_len, curr_len)
        if curr_len == -1:
            no_path_query.write(raw_question[u"QuestionId"] + '\n')
            no_path += 1
        else:
            lens.append(curr_len)
    print "There are {} questions in total".format(len(raw_questions))
    print "Maximum path is {}".format(max_len)
    print "Mean path is: {}, Median path is: {}".format(
        np.mean(np.array(lens)), np.median(np.array(lens)))
    print "No path: {}".format(no_path)
コード例 #10
0
from scipy.spatial import distance
import os
import heapq
import pickle
import logging
import math
import nltk

logging.basicConfig(filename="relation_semantics.log", level=logging.ERROR)
WEBQSP_TEST = "../../datasets/WebQSP/data/WebQSP.test.json"
GLOVE_FILE = "/home/xinyi/NLP_Resources/glove.6B/glove.6B.300d.txt"
GLOVE_PICKLE = "Glove.pickle"
WORD2VEC = {}
QUSTSIONWORDS = {'what', 'who', 'how', 'where', 'when'}
ENCODING = 'ISO-8859-1'
SPARQL = sparqlUtils()
VALID_WORD = {}
STOPWORDS = set(nltk.corpus.stopwords.words('english'))


def load_WebQSP():
    raw_questions = []
    with open(WEBQSP_TEST) as file:
        raw_questions = json.load(file)["Questions"]
    return raw_questions


def loadGlove():
    global VALID_WORD
    global WORD2VEC
    if os.path.exists(GLOVE_PICKLE):
コード例 #11
0
import json
from kbEndPoint.utils.sparql import sparqlUtils

JSON_PATH = "/Users/funke/rel_constraint_processed_test.json"
JSON_DEST_PATH = "/Users/funke/rel_constraint_processed_entities_test.json"

if __name__ == '__main__':
    sparql = sparqlUtils()
    ques_json = json.load(open(JSON_DEST_PATH, 'r'))
    no_topic = 0
    for q in ques_json:
        print q['QuestionId']
        print q['TopicTypes']
        if len(q['TopicTypes']) == 0:
            no_topic += 1
    print(no_topic)
    # ques_json = json.load(open(JSON_PATH, 'r'))
    # for q in ques_json:
    #     print q['QuestionId']
    #     topics = q['Topics']
    #     topic_types_dict = {}
    #     for topic in topics:
    #         types = sparql.get_entity_types(topic)
    #         if types is None:
    #             continue
    #         topic_types_dict[topic] = types
    #     q['TopicTypes'] = topic_types_dict
    # json.dump(ques_json, open(JSON_DEST_PATH, 'w+'), indent=4)
コード例 #12
0
 def __init__(self):
     self.sparql = sparqlUtils()
     self.all_path_entity_cache = {}
コード例 #13
0
 def __init__(self):
     self.sparql = sparqlUtils()
     self.entity_names_cache = {}
     self.linker = EL_helper()
コード例 #14
0
 def __init__(self):
     self.sparql = sparqlUtils()
     self.stopwords = set(stopwords.words('english'))
     self.lemmatizer = WordNetLemmatizer()
コード例 #15
0
 def __init__(self):
     self.sparql = sparqlUtils()