コード例 #1
0
def right_subgraph_relation_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    query_contains_filter = 'filter' in sparql_query.lower()
    if query_contains_filter:
        old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
        # Find second entity value, substituting the answer variable with the corresponding variable
        answer_var_name = re.findall(r'SELECT (\?\w*) WHERE', sparql_query, re.IGNORECASE)[0]
        entity_var_name = re.findall(r' (\?\w*) filter', sparql_query, re.IGNORECASE)[0]
        sparql_query_entity = sparql_query.replace(answer_var_name, entity_var_name, 1)
        old_entities_ids.append(next(iter(questions_generator.get_sparql_query_results(sparql_query_entity)['results']['bindings'][0].values()))['value'].split("/")[-1])
    else:
        old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0, 1])
    old_properties_ids = questions_generator.get_elements_from_query(sparql_query, [0, 1], True)
    old_answer_id = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1]
    if query_contains_filter:
        second_query = "select ?ans where {wd:" + old_answer_id + " ?ans ?obj . FILTER (|filter|?obj| && ?ans not in (wdt:|old_property_id|))} LIMIT 20"
    else:
        second_query = "select ?ans where {wd:" + old_answer_id + " ?ans ?obj . ?obj wdt:|rel_entity_type| wd:|entity_type| . FILTER (?ans not in (wdt:|old_property_id|))} LIMIT 20"
    found, old_properties = questions_generator.relation_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, old_properties_ids, \
        ["select ?ans where {?sbj ?ans wd:" + old_answer_id + " . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (?ans not in (wdt:|old_property_id|))} LIMIT 20",
        second_query])
    if not found:
        # There aren't valid candidates, so try with "relation_2" function
        return right_subgraph_relation_2_generation(current_uid, generated_template, generated_questions, old_properties_ids, old_properties)
    else:
        right_subgraph_nnqt_question_construction(generated_template)
        return generated_template
コード例 #2
0
def right_subgraph_2_relation_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
    answer_var_name = re.findall(r'SELECT (\?\w*) WHERE', sparql_query, re.IGNORECASE)[0]
    entity_var_name = re.findall(r'. (\?\w*) wdt:', sparql_query)[0]
    modified_sparql_query = sparql_query.replace(answer_var_name, entity_var_name, 1)
    modified_answer_entity = questions_generator.get_sparql_query_results(modified_sparql_query)['results']['bindings'][0][entity_var_name[1:]]['value'].split("/")[-1]
    old_entities_ids.append(modified_answer_entity)
    old_properties_ids = questions_generator.get_elements_from_query(sparql_query, [0, 1], True)
    old_answer_id = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1]
    modified_answer_filter, _ = questions_generator.get_filter_from_element(modified_answer_entity, "obj", "", False)
    old_answer_filter, _ = questions_generator.get_filter_from_element(old_answer_id, "sbj", "", False)
    if modified_answer_filter:
        first_query = "select ?ans where {?sbj ?ans ?obj . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + modified_answer_filter + "?ans not in (wdt:|old_property_id|))} LIMIT 20"
    else:
        first_query = "select ?ans where {?sbj ?ans wd:" + modified_answer_entity + " . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (?ans not in (wdt:|old_property_id|))} LIMIT 20"
    if old_answer_filter:
        second_query = "select ?ans where {?sbj ?ans ?obj . ?obj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + old_answer_filter + "?ans not in (wdt:|old_property_id|))} LIMIT 20"
    else:
        second_query = "select ?ans where {wd:" + old_answer_id + " ?ans ?obj . ?obj wdt:|rel_entity_type| wd:|entity_type| . FILTER (?ans not in (wdt:|old_property_id|))} LIMIT 20"
    found, old_properties = questions_generator.relation_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, old_properties_ids, \
        [first_query, second_query])
    if not found:
        # There aren't valid candidates, so try with "relation_2" function
        return right_subgraph_2_relation_2_generation(current_uid, generated_template, generated_questions, old_properties_ids, old_properties)
    else:
        right_subgraph_2_nnqt_question_construction(generated_template)
        return generated_template
コード例 #3
0
def filter_questions(filename: str):
    with open("dataset/" + filename + ".json", "r") as json_file:
        json_data = json.load(json_file)
    if os.path.exists("dataset/" + filename + "_filtered.json"):
        with open("dataset/" + filename + "_filtered.json", "r") as json_file:
            filtered_json_data = json.load(json_file)
    else:
        filtered_json_data = {"num_filtered_questions": -1, "questions": []}
    old_filtered_json_data_length = filtered_json_data['num_filtered_questions']
    for index, element in enumerate(json_data):
        # Skip already saved data
        if index > old_filtered_json_data_length:
            if index % 100 == 0:
                print(index)
            # There are some questions considered malformed, which have to be skipped and so an empty list is returned. The same is done with
            # queries that cause an Internal Server Error
            try:
                results = questions_generator.get_sparql_query_results(element['sparql_wikidata'])
            except QueryBadFormed:
                results = []
            except URLError:
                results = questions_generator.get_sparql_query_results(element['sparql_wikidata'])
            if len(results) > 0:
                if 'boolean' in results:
                    filtered_json_data['questions'].append(element)
                elif len(results['results']['bindings']) > 0 and ((element['template_id'] != "Count_1" and element['template_id'] != "Count_2") or \
                    results['results']['bindings'][0]['value']['value'] != "0"):
                    # Check if there is at least one English answer in cases different from count, otherwise check if the obtained number is different from 0
                    answers = results['results']['bindings']
                    # Check all answers, for a maximum of 50 elements, until it is found one with an English label, since getting labels is a slow process and there
                    # could be questions with a lot of possible answers (I found one with more than 14,000 answers)
                    answer = ""
                    i = 0
                    while i in range(min(50, len(answers))) and not answer:
                        answer_iter = iter(results[i].values())
                        answer = convert_entity_qid_to_label(next(answer_iter)['value'].split("/")[-1])
                        # Check if there is a second answer and has an English label, if so add to answers list (DeepPavlov always returns only a single answer). If an answer
                        # has already been found this check is useless
                        if not answer and (not filtered_question['subgraph'] or filtered_question['subgraph'] == "two intentions right subgraph"):
                            answer = convert_entity_qid_to_label(next(answer_iter)['value'].split("/")[-1])
                        i += 1
                    if answer:
                        filtered_json_data['questions'].append(element)
            filtered_json_data['num_filtered_questions'] += 1
            # Save temporary results
            if index % 5000 == 0 and index > 0:
                with open("dataset/" + filename + "_filtered.json", "w") as json_file:
                    json.dump(filtered_json_data, json_file, indent=2, ensure_ascii=False)
    # Save only questions list
    with open("dataset/" + filename + "_filtered.json", "w") as json_file:
        json.dump(filtered_json_data["questions"], json_file, indent=2, ensure_ascii=False)
コード例 #4
0
def statement_property_2_relation_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    # Check if there is a filter: if not so the last element is an entity
    query_contains_filter = 'filter' in sparql_query.lower()
    if query_contains_filter:
        old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
        # There is only one entity used for both properties
        old_entities_ids.append(old_entities_ids[0])
    else:
        old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0, 1])
    old_properties_ids = [questions_generator.get_specific_elements_from_query(sparql_query, [0], "p", "P")[0]]
    # The order is inverted because in the queries of this case the first entity is linked to the second property and the second entity is linked to the first property
    old_properties_ids.insert(0, questions_generator.get_specific_elements_from_query(sparql_query, [0], "pq", "P")[0])
    old_answer = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1]
    # Find answer filter and type
    old_answer_filter, element_type = questions_generator.get_filter_from_element(old_answer, "obj", "s")
    # The first query is the same for both cases
    if element_type == questions_generator.ElementType.entity:
        first_query = "select distinct ?ans where {?sbj ?rel ?s . ?s ?ans wd:" + old_answer + " . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + \
            old_answer_filter + " && ?ans not in (pq:|old_property_id|))} LIMIT 20"
    else:
        # The answer is not an entity
        first_query = "SELECT distinct ?ans WHERE {?sbj ?rel ?s . ?s ?ans ?obj . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + old_answer_filter + \
            " && ?ans not in (pq:|old_property_id|))} LIMIT 10"
    if query_contains_filter:
        # Find qualifier value, substituting the answer variable with the corresponding variable
        answer_var_name = re.findall(r'SELECT (\?\w*) WHERE', sparql_query, re.IGNORECASE)[0]
        qualifier_var_name = re.findall(old_properties_ids[1] + r' (\?\w*) filter', sparql_query, re.IGNORECASE)[0]
        sparql_query_qualifier = sparql_query.replace(answer_var_name, qualifier_var_name, 1)
        old_qualifier_value = next(iter(questions_generator.get_sparql_query_results(sparql_query_qualifier)['results']['bindings'][0].values()))['value'].split("/")[-1]
        # Find qualifier filter
        old_qualifier_filter, _ = questions_generator.get_filter_from_element(old_qualifier_value, "x", "s")
        # Since the answer is not an entity, in this case the queries results link the known entity to a value of the same type of the original one: the type is
        # defined through the corresponding filter. Besides the second query accepts only properties that are "qualifiers", so that are represented with the "p" prefix
        found, old_properties = questions_generator.relation_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, old_properties_ids, \
            [first_query, "SELECT distinct ?ans WHERE { ?sbj ?ans ?s . ?s ?rel2 ?x . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + old_qualifier_filter + \
                " && ?ans not in (p:|old_property_id|)) } LIMIT 10"],
            ["pq", ""])
    else:
        # The first query is identical to the other case, the second instead becomes more normal
        found, old_properties = questions_generator.relation_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, old_properties_ids, \
            [first_query, "select distinct ?ans where {wd:" + old_entities_ids[0] + " ?ans ?s . ?s ?rel2 ?obj . ?obj wdt:|rel_entity_type| wd:|entity_type| . FILTER " + \
                "(REGEX(STR(?s), \"Q(\\\\d+)-\") && ?ans not in (p:|old_property_id|))} LIMIT 20"],
            ["pq", ""])
    if not found:
        # There aren't valid candidates, so try with "relation_2" function
        return statement_property_2_relation_2_generation(current_uid, generated_template, generated_questions, old_properties_ids, old_properties)
    else:
        statement_property_2_nnqt_question_construction(generated_template)
        return generated_template
コード例 #5
0
def simple_question_left_template_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Tuple[Dict[str, Any], \
    str, str, str, str, str]:
    generated_template = deepcopy(question_template)
    # There could be properties without an English label, in that case repeat the whole operation; this should be a very rare case
    while True:
        # A generated entity possibly couldn't have properties, although it should happen rarely: in that case a new random entity is obtained and the entire operation is repeated
        while True:
            # First entity is the object, I made this decision because I saw that doing so is easier to find a valid solution; sbj will still be extracted, because it could
            # not have an English label
            first_entity_id, first_entity = wikidata_ids_extractor.get_random_wikidata_entity_from_all(
            )
            possible_questions_data = questions_generator.get_sparql_query_results("select distinct ?rel ?sbj ?sbjLabel ?objio ?objioLabel where {" +
                "?sbj ?rel wd:" + first_entity_id + " . ?sbj wdt:P31 ?objio . ?sbj rdfs:label ?sbjLabel . ?objio rdfs:label ?objioLabel ." + \
                    " FILTER (LANG(?sbjLabel) = \"en\" && LANG(?objioLabel) = \"en\")} LIMIT 10")
            possible_questions_number = len(
                possible_questions_data['results']['bindings'])
            if possible_questions_number > 0:
                break
        possible_questions_random_index = random.choice(
            range(possible_questions_number))
        question_data = possible_questions_data['results']['bindings'][
            possible_questions_random_index]
        sbj_entity_id = question_data['sbj']['value'].split('/')[-1]
        rel_property_id = question_data['rel']['value'].split('/')[-1]
        objio_entity_id = question_data['objio']['value'].split('/')[-1]
        try:
            # Get entities and relation English names
            sbj_entity = question_data['sbjLabel']['value'].replace("_", " ")
            rel_property = questions_generator.get_entity_name_from_wikidata_id(
                rel_property_id)
            objio_entity = question_data['objioLabel']['value'].replace(
                "_", " ")
            # Verify if the first answer got with the query is the subject found during question generation. If the query has a lot of results is probable that the first answer will be
            # in a position different from the first
            generated_template['sparql_wikidata'] = "select distinct ?sbj where { ?sbj wdt:" + rel_property_id + " wd:" + first_entity_id + " . ?sbj wdt:P31 wd:" + objio_entity_id + \
                " . ?sbj rdfs:label ?sbjLabel . FILTER (LANG(?sbjLabel) = \"en\") } LIMIT 5"
            sparql_result = next(
                iter(
                    questions_generator.get_sparql_query_results(
                        generated_template['sparql_wikidata'])['results']
                    ['bindings'][0].values()))['value'].split("/")[-1]
            if sparql_result == sbj_entity_id:
                break
        except:
            continue
    nnqt_question = "What is the {" + objio_entity + "} for {" + rel_property + "} of {" + first_entity + "}"
    generated_template['NNQT_question'] = nnqt_question
    generated_template['question'] = nnqt_question
    generated_template['paraphrased_question'] = nnqt_question
    return generated_template, first_entity_id, first_entity, rel_property_id, rel_property
コード例 #6
0
def check_original_questions(set_name: str):
    # Load filtered questions
    with open("dataset/" + set_name + "_filtered.json", "r") as json_file:
        filtered_questions = json.load(json_file)
    # Load generated questions
    with open("dataset/" + set_name + "_generated.json", "r") as json_file:
        generated_questions = json.load(json_file)
    # Get the filtered questions used to create the generated questions
    original_questions = []
    for generated_question in generated_questions:
        for filtered_question in filtered_questions:
            # For the check we could use respectively only "old_sparql_wikidata" and "sparql_wikidata", but that choice should be longer to execute than the actual one.
            # This is the reason of the actual checks order, since usually "template_id" and "template_index" values are very short and variable. The last check avoids
            # duplicates if there are questions generated from the same element
            if generated_question['template_id'] == filtered_question['template_id'] and generated_question['template_index'] == filtered_question['template_index'] and \
                generated_question['subgraph'] == filtered_question['subgraph']:
                # There are some questions considered malformed, which have to be skipped and so an empty list is returned. The same is done with
                # queries that cause an Internal Server Error
                try:
                    results = questions_generator.get_sparql_query_results(filtered_question['sparql_wikidata'])
                except QueryBadFormed:
                    results = []
                except URLError:
                    results = questions_generator.get_sparql_query_results(filtered_question['sparql_wikidata'])
                if len(results) == 0:
                    # There aren't answers
                    print("(" + str(filtered_question['uid']) + ", " + str(generated_question['uid']) + ")")
                elif (filtered_question['template_id'] == "Count_1" or filtered_question['template_id'] == "Count_2") and results['results']['bindings'][0]['value']['value'] == "0":
                    # The question is a count and the answer is 0
                    print("(" + str(filtered_question['uid']) + ", " + str(generated_question['uid']) + ")")
                else:
                    # Check if there is at least one answer with an English label
                    answers = results['results']['bindings']
                    # Check all answers, for a maximum of 50 elements, until it is found one with an English label, since getting labels is a slow process and there
                    # could be questions with a lot of possible answers (I found one with more than 14,000 answers)
                    answer = ""
                    i = 0
                    while i in range(min(50, len(answers))) and not answer:
                        answer_iter = iter(results[i].values())
                        answer = convert_entity_qid_to_label(next(answer_iter)['value'].split("/")[-1])
                        # Check if there is a second answer and has an English label, if so add to answers list (DeepPavlov always returns only a single answer). If an answer
                        # has already been found this check is useless
                        if not answer and (not filtered_question['subgraph'] or filtered_question['subgraph'] == "two intentions right subgraph"):
                            answer = convert_entity_qid_to_label(next(answer_iter)['value'].split("/")[-1])
                        i += 1
                    if not answer:
                        # All answers don't have an English label
                        print("(" + str(filtered_question['uid']) + ", " + str(generated_question['uid']) + ")")
                break
コード例 #7
0
def two_intentions_right_subgraph_relation_3_generation(
        current_uid: int, question_template: Dict[str, Any],
        generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(
        sparql_query, [0, 1])
    old_properties_ids = questions_generator.get_elements_from_query(
        sparql_query, [0, 1], True)
    answers_iter = iter(
        questions_generator.get_sparql_query_results(sparql_query)['results']
        ['bindings'][0].values())
    old_answer_id_1 = next(answers_iter)['value'].split("/")[-1]
    old_answer_id_2 = next(answers_iter)['value'].split("/")[-1]
    found, old_properties = questions_generator.relation_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, old_properties_ids, \
        ["select ?ans where {?sbj ?ans wd:" + old_answer_id_1 + " . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (?ans not in (wdt:|old_property_id|))} LIMIT 20",
        "select ?ans where {?sbj ?ans wd:" + old_answer_id_2 + " . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (?ans not in (wdt:|old_property_id|))} LIMIT 20"])
    if not found:
        # There aren't valid candidates, so try with "relation_2" function
        return two_intentions_right_subgraph_relation_2_generation(
            current_uid, generated_template, generated_questions,
            old_properties_ids, old_properties)
    else:
        two_intentions_right_subgraph_nnqt_question_construction(
            generated_template)
        return generated_template
コード例 #8
0
def two_intentions_right_subgraph_entity_3_generation(
        current_uid: int, question_template: Dict[str, Any],
        generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(
        sparql_query, [0, 1])
    answers_iter = iter(
        questions_generator.get_sparql_query_results(sparql_query)['results']
        ['bindings'][0].values())
    old_answer_id_1 = next(answers_iter)['value'].split("/")[-1]
    old_answer_id_2 = next(answers_iter)['value'].split("/")[-1]
    found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [
        "select distinct ?ans ?ansLabel where {?ans ?rel wd:" + old_answer_id_1 + " . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
            "FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20", "select distinct ?ans ?ansLabel where {?ans ?rel wd:" + old_answer_id_2 + \
                " . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20"])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return two_intentions_right_subgraph_entity_2_generation(
            current_uid, question_template, generated_questions,
            old_entities_ids, old_entities)
    else:
        two_intentions_right_subgraph_nnqt_question_construction(
            generated_template)
        return generated_template
コード例 #9
0
def unknown_entity_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
    # In this case the procedure is different from normal: it takes the answer query, modifies it to obtain all entities of the same type or class of the
    # original answer that return results, and then uses this list to exclude these entities from possible candidates
    general_sparql_query = sparql_query.replace("wd:" + old_entities_ids[0], "?ans")
    # Get the substring between "{" and "}"
    general_sparql_query = re.findall("{(.+)}", general_sparql_query)[0]
    type_common_string = "?ans wdt:|rel_entity_type| wd:|entity_type|"
    """query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" " + \
        "&& NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 20" """
    # This version of the query is more complete and adapted for "entity_3" logic, but is also slow and unsafe, since sometimes raises a server error
    old_answer = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1]
    # Find answer filter and type
    old_answer_filter, _ = questions_generator.get_filter_from_element(old_answer, "obj", "s", False)
    query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans ?rel ?obj . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" && " + \
        old_answer_filter + "NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 5" 
    found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [query])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return unknown_entity_2_generation(current_uid, question_template, generated_questions, old_entities_ids, old_entities)
    else:
        unknown_nnqt_question_construction(generated_template)
        return generated_template
def string_matching_simple_contains_word_relation_3_generation(
        current_uid: int, question_template: Dict[str, Any],
        generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(
        sparql_query, [0])
    old_properties_ids = questions_generator.get_elements_from_query(
        sparql_query, [0], True)
    print(sparql_query)
    old_answer_id = next(
        iter(
            questions_generator.get_sparql_query_results(sparql_query)
            ['results']['bindings'][0].values()))['value'].split("/")[-1]
    found, old_properties = questions_generator.relation_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, old_properties_ids, \
        ["select ?ans where {wd:" + old_answer_id + " ?ans ?obj . ?obj wdt:|rel_entity_type| wd:|entity_type| . FILTER (?ans not in (wdt:|old_property_id|))} LIMIT 20"])
    if not found:
        # There aren't valid candidates, so try with "relation_2" function
        return string_matching_simple_contains_word_relation_2_generation(
            current_uid, generated_template, generated_questions,
            old_properties_ids, old_properties)
    else:
        string_matching_simple_contains_word_nnqt_question_construction(
            generated_template)
        return generated_template
コード例 #11
0
def center_2_entity_3_generation(
        current_uid: int, question_template: Dict[str, Any],
        generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(
        sparql_query, [0])
    old_answer_id = next(
        iter(
            questions_generator.get_sparql_query_results(sparql_query)
            ['results']['bindings'][0].values()))['value'].split("/")[-1]
    answer_var_name = re.findall(r'DISTINCT (\?\w*) WHERE', sparql_query,
                                 re.IGNORECASE)[0]
    entity_var_name = re.findall(r' (\?\w*)}', sparql_query)[0]
    modified_sparql_query = sparql_query.replace(answer_var_name,
                                                 entity_var_name, 1)
    modified_answer_entity = questions_generator.get_sparql_query_results(
        modified_sparql_query)['results']['bindings'][0][
            entity_var_name[1:]]['value'].split("/")[-1]
    modified_answer_filter, _ = questions_generator.get_filter_from_element(
        modified_answer_entity, "obj", "", False)
    if modified_answer_filter:
        query = "select ?ans ?ansLabel where {?ans ?rel ?obj . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
            "FILTER (" + modified_answer_filter + "LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20"
    else:
        query = "select ?ans ?ansLabel where {?ans ?rel wd:" + old_answer_id + " . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
            "FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20"
    found, old_entities = questions_generator.entity_3_generation_common_part(
        current_uid, generated_template, generated_questions, old_entities_ids,
        [query])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return center_2_entity_2_generation(current_uid, question_template,
                                            generated_questions,
                                            old_entities_ids, old_entities)
    else:
        center_2_nnqt_question_construction(generated_template)
        return generated_template
コード例 #12
0
def get_sports_json():
    sports_query_results = questions_generator.get_sparql_query_results(
        "select ?sbj ?sbjLabel {{ select ?sbj ?sbjLabel where {?sbj wdt:P31 wd:Q31629 . "
        +
        "SERVICE wikibase:label { bd:serviceParam wikibase:language \"en\". }}} FILTER (LANG(?sbjLabel) = \"en\") }"
    )
    sports_list = []
    for element in sports_query_results['results']['bindings']:
        sport_id = element['sbj']['value'].split('/')[-1]
        sport_title = element['sbjLabel']['value'].lower()
        sport_element = {"title": sport_title, "q": sport_id}
        sports_list.append(sport_element)
    sport_dict = {"*": [{"a": {"*": sports_list}}]}
    with open("entities_and_properties/Sports.json", "w") as sport_json_file:
        json.dump(sport_dict, sport_json_file)
コード例 #13
0
def statement_property_nnqt_question_construction(generated_template: Dict[str, Any]):
    sparql_query = generated_template['old_sparql_wikidata']
    if 'filter' in sparql_query.lower():
        # Find second entity value, substituting the answer variable with the corresponding variable
        answer_var_name = re.findall(r'SELECT (\?\w*) WHERE', sparql_query, re.IGNORECASE)[0]
        entity_var_name = re.findall(r' (\?\w*) filter', sparql_query, re.IGNORECASE)[0]
        sparql_query_entity = sparql_query.replace(answer_var_name, entity_var_name, 1)
        fixed_entity = next(iter(questions_generator.get_sparql_query_results(sparql_query_entity)['results']['bindings'][0].values()))['value'].split("/")[-1]
        # If entity is a date, keep only year-month-day part
        if questions_generator.get_element_type(fixed_entity) == questions_generator.ElementType.date:
            fixed_entity = fixed_entity.split("T")[0]
        fixed_entities = [fixed_entity]
        questions_generator.recreate_nnqt_question(generated_template, "What is |property_0| of |entity_0| that is |property_1| is |element_0| ?", [0], [0], False,
        fixed_entities = fixed_entities)
    else:
        questions_generator.recreate_nnqt_question(generated_template, "What is |property_0| of |entity_0| that is |property_1| is |entity_1| ?", [0, 1], [0], False)
コード例 #14
0
def unknown_2_relation_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0, 1])
    # The first entity is used for the last two properties
    old_entities_ids.reverse()
    old_entities_ids.append(old_entities_ids[0])
    old_properties_ids = questions_generator.get_specific_elements_from_query(sparql_query, [0], "p", "P")
    old_properties_ids += questions_generator.get_specific_elements_from_query(sparql_query, [0, 1], "pq", "P")
    # Answers are inverted
    old_answers = iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values())
    old_answer_2 = next(old_answers)['value'].split("/")[-1]
    old_answer_1 = next(old_answers)['value'].split("/")[-1]
    # Find answers filter and type
    old_answer_1_filter, element_type_1 = questions_generator.get_filter_from_element(old_answer_1, "obj", "s")
    old_answer_2_filter, element_type_2 = questions_generator.get_filter_from_element(old_answer_2, "obj", "s")
    # These two queries are identical except for the associated property and the answer filter
    if element_type_1 == questions_generator.ElementType.entity:
        second_query = "select distinct ?ans where {?sbj ?rel ?s . ?s ?ans wd:" + old_answer_1 + " . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + \
            old_answer_1_filter + " && ?ans not in (pq:|old_property_id|))} LIMIT 20"
    else:
        # If the answer is not an entity, in this case the queries results link the known entity to a value of the same type of the original one: the type is
        # defined through the corresponding filter
        second_query = "SELECT distinct ?ans WHERE {?sbj ?rel ?s . ?s ?ans ?obj . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + old_answer_1_filter + \
            " && ?ans not in (pq:|old_property_id|))} LIMIT 10"
    if element_type_2 == questions_generator.ElementType.entity:
        third_query = "select distinct ?ans where {?sbj ?rel ?s . ?s ?ans wd:" + old_answer_2 + " . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + \
            old_answer_2_filter + " && ?ans not in (pq:|old_property_id|))} LIMIT 20"
    else:
        # If the answer is not an entity, in this case the queries results link the known entity to a value of the same type of the original one: the type is
        # defined through the corresponding filter
        third_query = "SELECT distinct ?ans WHERE {?sbj ?rel ?s . ?s ?ans ?obj . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + old_answer_2_filter + \
            " && ?ans not in (pq:|old_property_id|))} LIMIT 10"
    # The second query accepts only properties that are "qualifiers", so that are represented with the "pq" prefix
    found, old_properties = questions_generator.relation_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, old_properties_ids, \
        ["select distinct ?ans where {wd:" + old_entities_ids[1] + " ?ans ?s . ?s ?rel2 ?obj . ?obj wdt:|rel_entity_type| wd:|entity_type| . FILTER " + \
            "(REGEX(STR(?s), \"Q(\\\\d+)-\") && ?ans not in (p:|old_property_id|))} LIMIT 20", second_query, third_query], ["", "pq", "pq"])
    if not found:
        # There aren't valid candidates, so try with "relation_2" function
        return unknown_2_relation_2_generation(current_uid, generated_template, generated_questions, old_properties_ids, old_properties)
    else:
        unknown_2_nnqt_question_construction(generated_template)
        return generated_template
def string_matching_simple_contains_word_entity_3_generation(
        current_uid: int, question_template: Dict[str, Any],
        generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(
        sparql_query, [0])
    old_answer_id = next(
        iter(
            questions_generator.get_sparql_query_results(sparql_query)
            ['results']['bindings'][0].values()))['value'].split("/")[-1]
    found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [
        "select ?ans ?ansLabel where {wd:" + old_answer_id + " ?rel ?ans . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
            "FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20"])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return string_matching_simple_contains_word_entity_2_generation(
            current_uid, question_template, generated_questions,
            old_entities_ids, old_entities)
    else:
        string_matching_simple_contains_word_nnqt_question_construction(
            generated_template)
        return generated_template
コード例 #16
0
def statement_property_entity_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    # Check if there is a filter: if not so the last element is an entity
    query_contains_filter = 'filter' in sparql_query.lower()
    if query_contains_filter:
        old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
    else:
        # If there isn't a filter then the last element is an entity
        old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0, 1])
    # In this case the procedure is different from normal: it takes the answer query, modifies it to obtain all entities of the same type or class of the
    # original answer that return results, and then uses this list to exclude these entities from possible candidates
    general_sparql_query = sparql_query.replace("wd:" + old_entities_ids[0], "?ans")
    # Get the substring between "{" and "}"
    general_sparql_query = re.findall("{(.+)}", general_sparql_query)[0]
    type_common_string = "?ans wdt:|rel_entity_type| wd:|entity_type|"
    """first_query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" " + \
        "&& NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 20" """
    # This version of the query is more complete and adapted for "entity_3" logic, but is also slow and unsafe, since sometimes raises a server error
    old_answer = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1]
    # Find answer filter and type
    old_answer_filter, _ = questions_generator.get_filter_from_element(old_answer, "obj", "s", False)
    first_query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans ?rel ?obj . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" && " + \
        old_answer_filter + "NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 5" 
    if query_contains_filter:
        found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [first_query])
    else:
        # In this case there is an additional normal query with two triples that link the first known entity to the second one
        found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [first_query,
            "select ?ans ?ansLabel where {wd:" + old_entities_ids[0] + " ?rel ?s . ?s ?rel2 ?ans . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
                "FILTER (LANG(?ansLabel) = \"en\" && REGEX(STR(?s), \"Q(\\\\d+)-\") && ?ans not in (wd:|old_entity_id|))} LIMIT 20"])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return statement_property_entity_2_generation(current_uid, question_template, generated_questions, old_entities_ids, old_entities)
    else:
        statement_property_nnqt_question_construction(generated_template)
        return generated_template