Esempio n. 1
0
File: webqsp.py Progetto: we1l1n/SQG
 def parse_answer(self, answer_type, raw_answer):
     if answer_type == "Entity":
         return answer_type, Uri(
             self.kb.shorten_prefix() + raw_answer["AnswerArgument"],
             self.kb.parse_uri)
     elif answer_type == "Value":
         return answer_type, raw_answer["AnswerArgument"]
     else:
         return answer_type, Uri(raw_answer["EntityName"],
                                 self.kb.parse_uri)
Esempio n. 2
0
 def __extend_edge(self, edge, relation_uri):
     output = set()
     var_node = None
     if edge.source_node.are_all_uris_generic():
         var_node = edge.source_node
     if edge.dest_node.are_all_uris_generic():
         var_node = edge.dest_node
     ent1 = edge.source_node.first_uri_if_only()
     ent2 = edge.dest_node.first_uri_if_only()
     if not (var_node is None or ent1 is None or ent2 is None):
         result = self.kb.two_hop_graph(ent1, edge.uri, ent2, relation_uri)
         if result is not None:
             for item in result:
                 if item[1]:
                     if item[0] == 0:
                         n_s = self.create_or_get_node(1, True)
                         n_d = var_node
                         e = Edge(n_s, relation_uri, n_d)
                         output.add(e)
                     elif item[0] == 1:
                         n_s = var_node
                         n_d = self.create_or_get_node(1, True)
                         e = Edge(n_s, relation_uri, n_d)
                         output.add(e)
                     elif item[0] == 2:
                         n_s = var_node
                         n_d = self.create_or_get_node(1, True)
                         e = Edge(n_s, relation_uri, n_d)
                         output.add(e)
                         self.suggest_retrieve_id = 1
                     elif item[0] == 3:
                         n_s = self.create_or_get_node(1, True)
                         n_d = var_node
                         e = Edge(n_s, relation_uri, n_d)
                         output.add(e)
                     elif item[0] == 4:
                         n_d = self.create_or_get_node(relation_uri)
                         n_s = self.create_or_get_node(0, True)
                         e = Edge(n_s,
                                  Uri(self.kb.type_uri, self.kb.parse_uri),
                                  n_d)
                         output.add(e)
                         n_s = self.create_or_get_node(1, True)
                         e = Edge(n_s,
                                  Uri(self.kb.type_uri, self.kb.parse_uri),
                                  n_d)
                         output.add(e)
     return output
Esempio n. 3
0
 def parse_sparql(self, raw_query):
     raw_query = raw_query.replace("https://", "http://")
     uris = URIs([
         Uri(raw_uri, self.kb.parse_uri)
         for raw_uri in re.findall('(<[^>]*>|\?[^ ]*)', raw_query)
     ])
     return raw_query, True, uris
Esempio n. 4
0
    def parse_sparql(self, raw_query):
        uris = [
            Uri(raw_uri, DBpedia.parse_uri)
            for raw_uri in re.findall('<[^>]*>', raw_query)
        ]

        return raw_query, True, uris
Esempio n. 5
0
 def parse_answer(self, answer_type, raw_answer):
     if answer_type == "boolean":
         return answer_type, str(raw_answer)
     else:
         if not answer_type in raw_answer:
             answer_type = "\"{}\"".format(answer_type)
         return raw_answer[answer_type]["type"], Uri(raw_answer[answer_type]["value"], self.kb.parse_uri)
Esempio n. 6
0
File: qald.py Progetto: we1l1n/SQG
    def parse_sparql(self, raw_query):
        if "sparql" in raw_query:
            raw_query = raw_query["sparql"]
        elif isinstance(raw_query,
                        basestring) and "where" in raw_query.lower():
            pass
        else:
            raw_query = ""
        if "PREFIX " in raw_query:
            # QALD-5 bug!
            raw_query = raw_query.replace("htp:/w.", "http://www.")
            raw_query = raw_query.replace("htp:/dbpedia.", "http://dbpedia.")

            for item in re.findall("PREFIX [^:]*: <[^>]*>", raw_query):
                prefix = item[7:item.find(" ", 9)]
                uri = item[item.find("<"):-1]
                raw_query = raw_query.replace(prefix, uri)
            idx = raw_query.find("WHERE")
            idx2 = raw_query[:idx - 1].rfind(">")
            raw_query = raw_query[idx2 + 1:]
            for uri in re.findall('<[^ ]*', raw_query):
                raw_query = raw_query.replace(uri, uri + ">")

        uris = [
            Uri(raw_uri, self.kb.parse_uri)
            for raw_uri in re.findall('<[^>]*>', raw_query)
        ]
        supported = not any(
            substring in raw_query
            for substring in ["UNION", "FILTER", "OFFSET", "HAVING", "LIMIT"])
        return raw_query, supported, uris
Esempio n. 7
0
def query():
    question = request.json['question']
    raw_entities = request.json['entities']
    raw_relations = request.json['relations']

    entities = []
    for item in raw_entities:
        uris = [Uri(uri["uri"], DBpedia.parse_uri, uri["confidence"]) for uri in item["uris"]]
        entities.append(LinkedItem(item["surface"], uris))

    relations = []
    for item in raw_relations:
        uris = [Uri(uri["uri"], DBpedia.parse_uri, uri["confidence"]) for uri in item["uris"]]        
        relations.append(LinkedItem(item["surface"], uris))

    question_type, type_confidence = get_question_type(question)

    count_query = False
    ask_query = False
    if question_type == 2:
        count_query = True
    elif question_type == 1:
        ask_query = True

    generated_queries = []
    combination_mode = True
    if combination_mode:
        combination_list = create_entity_relations_combinations(entities, relations)
        for comb in combination_list:
            # why did Sylvia Liang just generates query if none exists?
            if len(generated_queries) == 0:
                generated_queries.extend(generate_query(question, question_type, comb[0], comb[1], count_query, ask_query))
    else:
        generated_queries = generate_query(question, question_type, entities, relations, count_query, ask_query)
    
    queries = postprocess(generated_queries, count_query, ask_query)

    result = {
        "queries": queries,
        "type": get_question_type_text(question_type),
        "type_confidence": type_confidence
    }

    return jsonify(result)
Esempio n. 8
0
    def __one_hop_graph(self,
                        entity_items,
                        relation_items,
                        threshold=None,
                        number_of_entities=1):
        top_uri = 1

        total = self.count_combinations(entity_items, relation_items,
                                        number_of_entities, top_uri)
        if threshold is not None:
            while total > threshold:
                top_uri -= 0.1
                total = self.count_combinations(entity_items, relation_items,
                                                number_of_entities, top_uri)

        with tqdm(total=total, disable=self.logger.level >= 10) as pbar:
            for relation_item in relation_items:
                for relation_uri in relation_item.top_uris(top_uri):
                    for entity_uris in itertools.product(
                            *
                        [items.top_uris(top_uri) for items in entity_items]):
                        for entity_uri in itertools.combinations(
                                entity_uris, number_of_entities):
                            pbar.update(1)
                            result = self.kb.one_hop_graph(
                                entity_uri[0], relation_uri,
                                entity_uri[1] if len(entity_uri) > 1 else None)
                            print('result:', result)
                            if result is not None:
                                for item in result:
                                    m = int(item["m"]["value"])
                                    uri = entity_uri[1] if len(
                                        entity_uri) > 1 else 0
                                    if m == 0:
                                        n_s = self.create_or_get_node(
                                            uri, True)
                                        n_d = self.create_or_get_node(
                                            entity_uri[0])
                                        e = Edge(n_s, relation_uri, n_d)
                                        self.add_edge(e)
                                    elif m == 1:
                                        n_s = self.create_or_get_node(
                                            entity_uri[0])
                                        n_d = self.create_or_get_node(
                                            uri, True)
                                        e = Edge(n_s, relation_uri, n_d)
                                        self.add_edge(e)
                                    elif m == 2:
                                        n_s = self.create_or_get_node(uri)
                                        n_d = self.create_or_get_node(
                                            relation_uri)
                                        e = Edge(
                                            n_s,
                                            Uri(self.kb.type_uri,
                                                self.kb.parse_uri), n_d)
                                        self.add_edge(e)
Esempio n. 9
0
 def __parse(self, dataset, name, top):
     output = []
     for item in dataset[name]:
         uris = []
         for uri in item["uris"]:
             uris.append(Uri(uri["uri"], self.parser, uri["confidence"]))
         start_index, length = item["surface"]
         surface = dataset["question"][start_index:start_index + length]
         output.append(LinkedItem(surface, uris[:top]))
     return output
Esempio n. 10
0
    def parse_answerset(self, raw_answers):
        if len(raw_answers) == 0:
            return []
        elif len(raw_answers) == 1:
            return self.parse_queryresult(raw_answers[0])
        else:
            result = []
            for item in raw_answers:
                result.append(
                    AnswerRow(item["string"],
                              lambda x: [Answer("uri", x, lambda t, y: ("uri", Uri(y, self.kb.parse_uri)))]))

            return result
Esempio n. 11
0
File: webqsp.py Progetto: we1l1n/SQG
    def parse_sparql(self, raw_query):
        # remove comments from the sparql query
        for t in re.findall("\#[^\n]*", raw_query):
            raw_query = raw_query.replace(t, " ")

        if "WHERE {" in raw_query:
            raw_query = raw_query[raw_query.find("WHERE {") + 7:]
            if raw_query.split("\n")[2].startswith("FILTER"):
                raw_query = " ".join(raw_query.split("\n")[3:])
            else:
                raw_query = raw_query.replace("\n", " ")
            raw_query = raw_query[:raw_query.rfind("}")]
        uris = [
            Uri(raw_uri, Freebase.parse_uri)
            for raw_uri in re.findall('(ns:[^ ]*|\?[^ ]*)', raw_query)
        ]
        supported = not any(substring in raw_query.upper()
                            for substring in ["EXISTS", "UNION", "FILTER"])
        return raw_query, supported, uris
Esempio n. 12
0
    question_type_classifier = SVMClassifier(
        os.path.join(question_type_classifier_path, "svm.model"))

    o = Orchestrator(None, question_type_classifier, None, parser, True)
    raw_entities = [{
        "surface":
        "",
        "uris": [{
            "confidence": 1,
            "uri": "http://dbpedia.org/resource/Bill_Finger"
        }]
    }]
    entities = []
    for item in raw_entities:
        uris = [
            Uri(uri["uri"], kb.parse_uri, uri["confidence"])
            for uri in item["uris"]
        ]
        entities.append(LinkedItem(item["surface"], uris))

    raw_relations = [{
        "surface":
        "",
        "uris": [{
            "confidence": 1,
            "uri": "http://dbpedia.org/ontology/creator"
        }]
    }, {
        "surface":
        "",
        "uris": [{
Esempio n. 13
0
def generate_query():
    if not flask.request.json:
        flask.abort(400)

    question = flask.request.json['question']
    force_count_query = flask.request.json[
        'force_count'] if 'force_count' in flask.request.json else False
    force_bool_query = flask.request.json[
        'force_bool'] if 'force_bool' in flask.request.json else False
    force_list_query = flask.request.json[
        'force_list'] if 'force_list' in flask.request.json else False
    raw_entities = flask.request.json['entities']
    raw_relations = flask.request.json['relations']
    h1_threshold = int(flask.request.json['h1_threshold']
                       ) if 'h1_threshold' in flask.request.json else 9999999
    timeout_threshold = int(flask.request.json['timeout']
                            ) if 'timeout' in flask.request.json else 9999999
    use_cache = bool(flask.request.json['use_cache']
                     ) if 'use_cache' in flask.request.json else True

    hash_key = hash(
        (str(question) + str(raw_entities) + str(raw_relations) +
         str(h1_threshold) + str(force_count_query) + str(force_bool_query) +
         str(force_list_query)).encode('utf-8'))

    if use_cache and hash_key in hash_list:
        return flask.jsonify(hash_list[hash_key]), 201

    logger.info(question)
    entities = []
    for item in raw_entities:
        uris = [
            Uri(uri["uri"], kb.parse_uri, uri["confidence"])
            for uri in item["uris"]
        ]
        entities.append(LinkedItem(item["surface"], uris))

    relations = []
    for item in raw_relations:
        uris = [
            Uri(uri["uri"], kb.parse_uri, uri["confidence"])
            for uri in item["uris"]
        ]
        relations.append(LinkedItem(item["surface"], uris))

    try:
        if len(entities) == 0 or len(relations) == 0:
            raise Exception('Wrong number of input entity/relation!')
        with timeout(timeout_threshold):
            question_type = None
            if force_list_query:
                question_type = 0
            elif force_bool_query:
                question_type = 1
            elif force_count_query:
                question_type = 2

            queries, question_type, type_confidence = queryBuilder.generate_query(
                question, entities, relations, h1_threshold, question_type)
            question_type_str = "list"
            ask_query = False
            count_query = False
            if question_type == 2:
                question_type_str = "count"
                count_query = True
            elif question_type == 1:
                question_type_str = "boolean"
                ask_query = True

            queries = [{
                "query":
                kb.sparql_query(item["where"],
                                "?u_" + str(item["suggested_id"]), count_query,
                                ask_query),
                "confidence":
                item["confidence"]
            } for item in queries]

            result = {
                'queries': queries,
                'type': question_type_str,
                'type_confidence': type_confidence
            }
            if use_cache:
                hash_list[hash_key] = result
                hash_list.save(hash_file)
            return flask.jsonify(result), 201
    except RuntimeError as expt:
        logger.error(expt)
        return flask.jsonify({'error': str(expt)}), 408
    except Exception as expt:
        logger.error(expt)
        return flask.jsonify({'error': str(expt)}), 422
Esempio n. 14
0
 def __get_generic_uri(self, uri, edges):
     return Uri.generic_uri(uri)
Esempio n. 15
0
 def __parse_answer(self, answer_type, raw_answer):
     prefix = self.kb.prefix()
     if len(prefix) > 0 and raw_answer.startswith(prefix):
         raw_answer = self.kb.shorten_prefix() + raw_answer[len(prefix):]
     return answer_type, Uri(raw_answer, self.kb.parse_uri)
Esempio n. 16
0
def generate_query():
    if not flask.request.json:
        flask.abort(400)

    question = flask.request.json['question']
    raw_entities = flask.request.json['entities']
    raw_relations = flask.request.json['relations']
    h1_threshold = int(flask.request.json['h1_threshold']
                       ) if 'h1_threshold' in flask.request.json else 9999999
    timeout_threshold = int(flask.request.json['timeout']
                            ) if 'timeout' in flask.request.json else 9999999
    use_cache = bool(flask.request.json['use_cache']
                     ) if 'use_cache' in flask.request.json else True

    hash_key = hash(
        str(question) + str(raw_entities) + str(raw_relations) +
        str(h1_threshold))
    if use_cache and hash_key in hash_list:
        return flask.jsonify(hash_list[hash_key]), 201

    logger.info(question)
    entities = []
    for item in raw_entities:
        uris = [
            Uri(uri["uri"], kb.parse_uri, uri["confidence"])
            for uri in item["uris"]
        ]
        entities.append(LinkedItem(item["surface"], uris))

    relations = []
    for item in raw_relations:
        uris = [
            Uri(uri["uri"], kb.parse_uri, uri["confidence"])
            for uri in item["uris"]
        ]
        relations.append(LinkedItem(item["surface"], uris))

    try:
        with timeout(timeout_threshold):
            queries, question_type = queryBuilder.generate_query(
                question, entities, relations, h1_threshold)
            question_type_str = "list"
            ask_query = False
            count_query = False
            if question_type == 2:
                question_type_str = "count"
                count_query = True
            elif question_type == 1:
                question_type_str = "boolean"
                ask_query = True

            queries = [{
                "query":
                kb.sparql_query(item["where"],
                                "?u_" + str(item["suggested_id"]), count_query,
                                ask_query),
                "confidence":
                item["confidence"]
            } for item in queries]

            result = {'queries': queries, 'type': question_type_str}
            if use_cache:
                hash_list[hash_key] = result
                hash_list.save(hash_file)
            return flask.jsonify(result), 201
    except:
        return flask.jsonify({}), 408
Esempio n. 17
0
    args.data = os.path.join(base_path, "data/lc_quad/")
    args.cuda = False

    parser = LC_QaudParser()
    kb = parser.kb

    base_dir = "./output"
    question_type_classifier_path = os.path.join(base_dir, "question_type_classifier")
    utility.makedirs(question_type_classifier_path)
    question_type_classifier = SVMClassifier(os.path.join(question_type_classifier_path, "svm.model"))

    o = Orchestrator(None, question_type_classifier, None, parser, True)
    raw_entities = [{"surface": "", "uris": [{"confidence": 1, "uri": "http://dbpedia.org/resource/Bill_Finger"}]}]
    entities = []
    for item in raw_entities:
        uris = [Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"]]
        entities.append(LinkedItem(item["surface"], uris))

    raw_relations = [{"surface": "", "uris": [{"confidence": 1, "uri": "http://dbpedia.org/ontology/creator"}]},
                     {"surface": "", "uris": [{"confidence": 1, "uri": "http://dbpedia.org/ontology/ComicsCharacter"}]}]

    relations = []
    for item in raw_relations:
        uris = [Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"]]
        relations.append(LinkedItem(item["surface"], uris))

    question = "Which comic characters are painted by Bill Finger?"
    generated_queries = o.generate_query(question, entities, relations)[0]
    # print generated_queries
    # generated_queries = [
    #     {'where': [u'?u_0 <http://dbpedia.org/ontology/creator> <http://dbpedia.org/resource/Bill_Finger>',
Esempio n. 18
0
 def parse_sparql(self, raw_query):
     uris = [
         Uri(raw_uri, self.kb.parse_uri)
         for raw_uri in re.findall('(<[^>]*>|\?[^ ]*)', raw_query)
     ]
     return raw_query, True, uris