def right_subgraph_2_relation_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
    answer_var_name = re.findall(r'SELECT (\?\w*) WHERE', sparql_query, re.IGNORECASE)[0]
    entity_var_name = re.findall(r'. (\?\w*) wdt:', sparql_query)[0]
    modified_sparql_query = sparql_query.replace(answer_var_name, entity_var_name, 1)
    modified_answer_entity = questions_generator.get_sparql_query_results(modified_sparql_query)['results']['bindings'][0][entity_var_name[1:]]['value'].split("/")[-1]
    old_entities_ids.append(modified_answer_entity)
    old_properties_ids = questions_generator.get_elements_from_query(sparql_query, [0, 1], True)
    old_answer_id = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1]
    modified_answer_filter, _ = questions_generator.get_filter_from_element(modified_answer_entity, "obj", "", False)
    old_answer_filter, _ = questions_generator.get_filter_from_element(old_answer_id, "sbj", "", False)
    if modified_answer_filter:
        first_query = "select ?ans where {?sbj ?ans ?obj . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + modified_answer_filter + "?ans not in (wdt:|old_property_id|))} LIMIT 20"
    else:
        first_query = "select ?ans where {?sbj ?ans wd:" + modified_answer_entity + " . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (?ans not in (wdt:|old_property_id|))} LIMIT 20"
    if old_answer_filter:
        second_query = "select ?ans where {?sbj ?ans ?obj . ?obj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + old_answer_filter + "?ans not in (wdt:|old_property_id|))} LIMIT 20"
    else:
        second_query = "select ?ans where {wd:" + old_answer_id + " ?ans ?obj . ?obj wdt:|rel_entity_type| wd:|entity_type| . FILTER (?ans not in (wdt:|old_property_id|))} LIMIT 20"
    found, old_properties = questions_generator.relation_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, old_properties_ids, \
        [first_query, second_query])
    if not found:
        # There aren't valid candidates, so try with "relation_2" function
        return right_subgraph_2_relation_2_generation(current_uid, generated_template, generated_questions, old_properties_ids, old_properties)
    else:
        right_subgraph_2_nnqt_question_construction(generated_template)
        return generated_template
def statement_property_2_relation_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    # Check if there is a filter: if not so the last element is an entity
    query_contains_filter = 'filter' in sparql_query.lower()
    if query_contains_filter:
        old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
        # There is only one entity used for both properties
        old_entities_ids.append(old_entities_ids[0])
    else:
        old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0, 1])
    old_properties_ids = [questions_generator.get_specific_elements_from_query(sparql_query, [0], "p", "P")[0]]
    # The order is inverted because in the queries of this case the first entity is linked to the second property and the second entity is linked to the first property
    old_properties_ids.insert(0, questions_generator.get_specific_elements_from_query(sparql_query, [0], "pq", "P")[0])
    old_answer = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1]
    # Find answer filter and type
    old_answer_filter, element_type = questions_generator.get_filter_from_element(old_answer, "obj", "s")
    # The first query is the same for both cases
    if element_type == questions_generator.ElementType.entity:
        first_query = "select distinct ?ans where {?sbj ?rel ?s . ?s ?ans wd:" + old_answer + " . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + \
            old_answer_filter + " && ?ans not in (pq:|old_property_id|))} LIMIT 20"
    else:
        # The answer is not an entity
        first_query = "SELECT distinct ?ans WHERE {?sbj ?rel ?s . ?s ?ans ?obj . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + old_answer_filter + \
            " && ?ans not in (pq:|old_property_id|))} LIMIT 10"
    if query_contains_filter:
        # Find qualifier value, substituting the answer variable with the corresponding variable
        answer_var_name = re.findall(r'SELECT (\?\w*) WHERE', sparql_query, re.IGNORECASE)[0]
        qualifier_var_name = re.findall(old_properties_ids[1] + r' (\?\w*) filter', sparql_query, re.IGNORECASE)[0]
        sparql_query_qualifier = sparql_query.replace(answer_var_name, qualifier_var_name, 1)
        old_qualifier_value = next(iter(questions_generator.get_sparql_query_results(sparql_query_qualifier)['results']['bindings'][0].values()))['value'].split("/")[-1]
        # Find qualifier filter
        old_qualifier_filter, _ = questions_generator.get_filter_from_element(old_qualifier_value, "x", "s")
        # Since the answer is not an entity, in this case the queries results link the known entity to a value of the same type of the original one: the type is
        # defined through the corresponding filter. Besides the second query accepts only properties that are "qualifiers", so that are represented with the "p" prefix
        found, old_properties = questions_generator.relation_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, old_properties_ids, \
            [first_query, "SELECT distinct ?ans WHERE { ?sbj ?ans ?s . ?s ?rel2 ?x . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + old_qualifier_filter + \
                " && ?ans not in (p:|old_property_id|)) } LIMIT 10"],
            ["pq", ""])
    else:
        # The first query is identical to the other case, the second instead becomes more normal
        found, old_properties = questions_generator.relation_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, old_properties_ids, \
            [first_query, "select distinct ?ans where {wd:" + old_entities_ids[0] + " ?ans ?s . ?s ?rel2 ?obj . ?obj wdt:|rel_entity_type| wd:|entity_type| . FILTER " + \
                "(REGEX(STR(?s), \"Q(\\\\d+)-\") && ?ans not in (p:|old_property_id|))} LIMIT 20"],
            ["pq", ""])
    if not found:
        # There aren't valid candidates, so try with "relation_2" function
        return statement_property_2_relation_2_generation(current_uid, generated_template, generated_questions, old_properties_ids, old_properties)
    else:
        statement_property_2_nnqt_question_construction(generated_template)
        return generated_template
Example #3
0
def unknown_entity_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
    # In this case the procedure is different from normal: it takes the answer query, modifies it to obtain all entities of the same type or class of the
    # original answer that return results, and then uses this list to exclude these entities from possible candidates
    general_sparql_query = sparql_query.replace("wd:" + old_entities_ids[0], "?ans")
    # Get the substring between "{" and "}"
    general_sparql_query = re.findall("{(.+)}", general_sparql_query)[0]
    type_common_string = "?ans wdt:|rel_entity_type| wd:|entity_type|"
    """query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" " + \
        "&& NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 20" """
    # This version of the query is more complete and adapted for "entity_3" logic, but is also slow and unsafe, since sometimes raises a server error
    old_answer = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1]
    # Find answer filter and type
    old_answer_filter, _ = questions_generator.get_filter_from_element(old_answer, "obj", "s", False)
    query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans ?rel ?obj . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" && " + \
        old_answer_filter + "NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 5" 
    found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [query])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return unknown_entity_2_generation(current_uid, question_template, generated_questions, old_entities_ids, old_entities)
    else:
        unknown_nnqt_question_construction(generated_template)
        return generated_template
Example #4
0
def unknown_2_relation_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0, 1])
    # The first entity is used for the last two properties
    old_entities_ids.reverse()
    old_entities_ids.append(old_entities_ids[0])
    old_properties_ids = questions_generator.get_specific_elements_from_query(sparql_query, [0], "p", "P")
    old_properties_ids += questions_generator.get_specific_elements_from_query(sparql_query, [0, 1], "pq", "P")
    # Answers are inverted
    old_answers = iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values())
    old_answer_2 = next(old_answers)['value'].split("/")[-1]
    old_answer_1 = next(old_answers)['value'].split("/")[-1]
    # Find answers filter and type
    old_answer_1_filter, element_type_1 = questions_generator.get_filter_from_element(old_answer_1, "obj", "s")
    old_answer_2_filter, element_type_2 = questions_generator.get_filter_from_element(old_answer_2, "obj", "s")
    # These two queries are identical except for the associated property and the answer filter
    if element_type_1 == questions_generator.ElementType.entity:
        second_query = "select distinct ?ans where {?sbj ?rel ?s . ?s ?ans wd:" + old_answer_1 + " . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + \
            old_answer_1_filter + " && ?ans not in (pq:|old_property_id|))} LIMIT 20"
    else:
        # If the answer is not an entity, in this case the queries results link the known entity to a value of the same type of the original one: the type is
        # defined through the corresponding filter
        second_query = "SELECT distinct ?ans WHERE {?sbj ?rel ?s . ?s ?ans ?obj . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + old_answer_1_filter + \
            " && ?ans not in (pq:|old_property_id|))} LIMIT 10"
    if element_type_2 == questions_generator.ElementType.entity:
        third_query = "select distinct ?ans where {?sbj ?rel ?s . ?s ?ans wd:" + old_answer_2 + " . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + \
            old_answer_2_filter + " && ?ans not in (pq:|old_property_id|))} LIMIT 20"
    else:
        # If the answer is not an entity, in this case the queries results link the known entity to a value of the same type of the original one: the type is
        # defined through the corresponding filter
        third_query = "SELECT distinct ?ans WHERE {?sbj ?rel ?s . ?s ?ans ?obj . ?sbj wdt:|rel_entity_type| wd:|entity_type| . FILTER (" + old_answer_2_filter + \
            " && ?ans not in (pq:|old_property_id|))} LIMIT 10"
    # The second query accepts only properties that are "qualifiers", so that are represented with the "pq" prefix
    found, old_properties = questions_generator.relation_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, old_properties_ids, \
        ["select distinct ?ans where {wd:" + old_entities_ids[1] + " ?ans ?s . ?s ?rel2 ?obj . ?obj wdt:|rel_entity_type| wd:|entity_type| . FILTER " + \
            "(REGEX(STR(?s), \"Q(\\\\d+)-\") && ?ans not in (p:|old_property_id|))} LIMIT 20", second_query, third_query], ["", "pq", "pq"])
    if not found:
        # There aren't valid candidates, so try with "relation_2" function
        return unknown_2_relation_2_generation(current_uid, generated_template, generated_questions, old_properties_ids, old_properties)
    else:
        unknown_2_nnqt_question_construction(generated_template)
        return generated_template
def right_subgraph_2_entity_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
    answer_var_name = re.findall(r'SELECT (\?\w*) WHERE', sparql_query, re.IGNORECASE)[0]
    entity_var_name = re.findall(r'. (\?\w*) wdt:', sparql_query)[0]
    modified_sparql_query = sparql_query.replace(answer_var_name, entity_var_name, 1)
    modified_answer_entity = questions_generator.get_sparql_query_results(modified_sparql_query)['results']['bindings'][0][entity_var_name[1:]]['value'].split("/")[-1]
    modified_answer_filter, _ = questions_generator.get_filter_from_element(modified_answer_entity, "obj", "", False)
    if modified_answer_filter:
        query = "select ?ans ?ansLabel where {?ans ?rel ?obj . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
            "FILTER (" + modified_answer_filter + "LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20"
    else:
        query = "select ?ans ?ansLabel where {?ans ?rel wd:" + modified_answer_entity + " . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
            "FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20"
    found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [query])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return right_subgraph_2_entity_2_generation(current_uid, question_template, generated_questions, old_entities_ids, old_entities)
    else:
        right_subgraph_2_nnqt_question_construction(generated_template)
        return generated_template
def statement_property_entity_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    # Check if there is a filter: if not so the last element is an entity
    query_contains_filter = 'filter' in sparql_query.lower()
    if query_contains_filter:
        old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
    else:
        # If there isn't a filter then the last element is an entity
        old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0, 1])
    # In this case the procedure is different from normal: it takes the answer query, modifies it to obtain all entities of the same type or class of the
    # original answer that return results, and then uses this list to exclude these entities from possible candidates
    general_sparql_query = sparql_query.replace("wd:" + old_entities_ids[0], "?ans")
    # Get the substring between "{" and "}"
    general_sparql_query = re.findall("{(.+)}", general_sparql_query)[0]
    type_common_string = "?ans wdt:|rel_entity_type| wd:|entity_type|"
    """first_query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" " + \
        "&& NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 20" """
    # This version of the query is more complete and adapted for "entity_3" logic, but is also slow and unsafe, since sometimes raises a server error
    old_answer = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1]
    # Find answer filter and type
    old_answer_filter, _ = questions_generator.get_filter_from_element(old_answer, "obj", "s", False)
    first_query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans ?rel ?obj . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" && " + \
        old_answer_filter + "NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 5" 
    if query_contains_filter:
        found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [first_query])
    else:
        # In this case there is an additional normal query with two triples that link the first known entity to the second one
        found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [first_query,
            "select ?ans ?ansLabel where {wd:" + old_entities_ids[0] + " ?rel ?s . ?s ?rel2 ?ans . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
                "FILTER (LANG(?ansLabel) = \"en\" && REGEX(STR(?s), \"Q(\\\\d+)-\") && ?ans not in (wd:|old_entity_id|))} LIMIT 20"])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return statement_property_entity_2_generation(current_uid, question_template, generated_questions, old_entities_ids, old_entities)
    else:
        statement_property_nnqt_question_construction(generated_template)
        return generated_template