def __init__(self, ques_src, running_dir, sub1_cands_dir, sub1_openie_dir, sub2_openie_dir, sparql_host="141.212.110.80", sparql_port="3093"): self.ques_src = ques_src self.running_dir = running_dir self.sub1_flat_file_path = os.path.join(running_dir, "sub1_lookup.csv") self.sub1_cands_dir = sub1_cands_dir self.sub2_cands_dir = os.path.join(running_dir, "sub2_cands") self.sub1_openie_dir = sub1_openie_dir self.sub2_openie_dir = sub2_openie_dir self.questions_dict = {} self.sparql = sparqlUtils(host=sparql_host, port=sparql_port) questions = json.load(codecs.open(ques_src, 'r', encoding='utf-8')) for q in questions: if q["compositionality_type"] == "composition" or q[ "compositionality_type"] == "conjunction": self.questions_dict[q["ID"]] = q self.test_index = pd.read_csv(self.sub1_flat_file_path, sep=',') self.test_index['index'] = self.test_index['index'].astype(int)
def __init__(self, ques_src, sub1_flat_file_path, sub1_cands_dir, sub2_cands_dir): self.complexqEndpoint = ComplexQuestionEndPoint() self.corechainGen = CoreChainGen() self.queryGraphGen = QueryGraphGen() self.ques_src = ques_src self.sub1_flat_file_path = sub1_flat_file_path self.sub1_cands_dir = sub1_cands_dir self.sub2_cands_dir = sub2_cands_dir self.forward_cache = {} self.forward_cache_fifo = [] self.MAX_SIZE = 10000 self.questions_dict = {} self.sparql = sparqlUtils() questions = json.load(codecs.open(ques_src, 'r', encoding='utf-8')) for q in questions: self.questions_dict[q["ID"]] = q self.template = Template(''' PREFIX ns: <http://rdf.freebase.com/ns/> SELECT DISTINCT ?x WHERE { ${r} ?x ns:type.object.name ?name . ${f} } ''') self.test_index = pd.read_csv(sub1_flat_file_path, sep=',') self.test_index['index'] = self.test_index['index'].astype(int)
def __init__(self): self.relations_to_filter = set( pd.read_csv(RELATIONS_FILTER, names=['rel']).rel) self.validate_subgraph = False self.sparql = sparqlUtils() self.entity_cache = {} self.is_bounded = True
def __init__(self): self.entity_linking_path = os.path.join( SMART_DATA_DIR, 'webquestions.examples.%s.e2e.top10.filter.tsv' % (SPLIT)) self.q_links_dict = self.load_webq_linking_data() self.sparql = sparqlUtils() self.type_dict = {} self.type_name_dict = {}
def __init__(self): self.sparql = sparqlUtils() self.cache_maxsize = 10000 self.cvt_constraints_cache = {} self.cvt_constraints_cache_elements_fifo = [] self.topic_entity_dict = {} self.type_dict = {} self.type_name_dict = {}
def __init__(self): self.sparql = sparqlUtils() self.topic_entity_dict = {} self.cache_maxsize = 10000 self.cvt_constraints_cache = {} self.cvt_constraints_cache_elements_fifo = [] self.topic_entity_dict = {} self.type_dict = {} self.type_name_dict = {} self.all_path_entity_cache = {} self.stopwords = set(stopwords.words('english')) self.lemmatizer = WordNetLemmatizer()
def __init__(self): self.sparql = sparqlUtils() self.cache_maxsize = 10000 self.corechainGen = CoreChainGen() self.queryGraphGen = QueryGraphGen() self.connecting_path_entity_cache = {} self.path_entity_cache_elements_fifo = [] self.cache_maxsize = 10000 self.cvt_constraints_cache = {} self.cvt_constraints_cache_elements_fifo = [] self.type_dict = {} self.type_name_dict = {}
def __init__(self, ques_src, lookup_path, kb_cands_dir, openie_cands_dir): self.sparql = sparqlUtils() self.ques_src = ques_src self.kb_cands_dir = kb_cands_dir self.lookup = pd.read_csv(lookup_path, sep=',') self.lookup['index'] = self.lookup['index'].astype(int) self.openie_cands_dir = openie_cands_dir self.questions = json.load(codecs.open(ques_src, 'r', encoding='utf-8'))["Questions"] self.ground_ans_dict = {} for q in self.questions: ques_id = q["QuestionId"] parses = q.get("Parses", []) entity_ans_dict = {} for parse in parses: topic_entity = parse["TopicEntityMid"] answers = parse.get("Answers", []) entity_ans_dict[topic_entity] = [a["AnswerArgument"] for a in answers] self.ground_ans_dict[ques_id] = entity_ans_dict
def check_shortestPath(raw_questions): SPARQL = sparqlUtils() max_len = 0 lens = [] no_path = 0 no_path_query = open(os.path.join(INPUT_PREFIX, 'no_path_query.txt'), 'w+') for raw_question in raw_questions: curr_len = -1 parses = raw_question[u"Parses"] for parse in parses: topic_entity = parse[u"TopicEntityMid"] answer_entities = [] answers = parse[u"Answers"] for answer in answers: answer_entities.append(answer[u"AnswerArgument"]) for idx in range(len(answer_entities)): answer_entity = answer_entities[idx] if answer_entity[0:2] != 'm.': answer_entity = '''"''' + answer_entity + '''^^^xsd:dateTime''' try: this_len = SPARQL.shortestPathLength( topic_entity, answer_entity) curr_len = max(this_len, curr_len) except Exception: continue max_len = max(max_len, curr_len) if curr_len == -1: no_path_query.write(raw_question[u"QuestionId"] + '\n') no_path += 1 else: lens.append(curr_len) print "There are {} questions in total".format(len(raw_questions)) print "Maximum path is {}".format(max_len) print "Mean path is: {}, Median path is: {}".format( np.mean(np.array(lens)), np.median(np.array(lens))) print "No path: {}".format(no_path)
from scipy.spatial import distance import os import heapq import pickle import logging import math import nltk logging.basicConfig(filename="relation_semantics.log", level=logging.ERROR) WEBQSP_TEST = "../../datasets/WebQSP/data/WebQSP.test.json" GLOVE_FILE = "/home/xinyi/NLP_Resources/glove.6B/glove.6B.300d.txt" GLOVE_PICKLE = "Glove.pickle" WORD2VEC = {} QUSTSIONWORDS = {'what', 'who', 'how', 'where', 'when'} ENCODING = 'ISO-8859-1' SPARQL = sparqlUtils() VALID_WORD = {} STOPWORDS = set(nltk.corpus.stopwords.words('english')) def load_WebQSP(): raw_questions = [] with open(WEBQSP_TEST) as file: raw_questions = json.load(file)["Questions"] return raw_questions def loadGlove(): global VALID_WORD global WORD2VEC if os.path.exists(GLOVE_PICKLE):
import json from kbEndPoint.utils.sparql import sparqlUtils JSON_PATH = "/Users/funke/rel_constraint_processed_test.json" JSON_DEST_PATH = "/Users/funke/rel_constraint_processed_entities_test.json" if __name__ == '__main__': sparql = sparqlUtils() ques_json = json.load(open(JSON_DEST_PATH, 'r')) no_topic = 0 for q in ques_json: print q['QuestionId'] print q['TopicTypes'] if len(q['TopicTypes']) == 0: no_topic += 1 print(no_topic) # ques_json = json.load(open(JSON_PATH, 'r')) # for q in ques_json: # print q['QuestionId'] # topics = q['Topics'] # topic_types_dict = {} # for topic in topics: # types = sparql.get_entity_types(topic) # if types is None: # continue # topic_types_dict[topic] = types # q['TopicTypes'] = topic_types_dict # json.dump(ques_json, open(JSON_DEST_PATH, 'w+'), indent=4)
def __init__(self): self.sparql = sparqlUtils() self.all_path_entity_cache = {}
def __init__(self): self.sparql = sparqlUtils() self.entity_names_cache = {} self.linker = EL_helper()
def __init__(self): self.sparql = sparqlUtils() self.stopwords = set(stopwords.words('english')) self.lemmatizer = WordNetLemmatizer()
def __init__(self): self.sparql = sparqlUtils()