Esempio n. 1
0
def two_intentions_right_subgraph_entity_3_generation(
        current_uid: int, question_template: Dict[str, Any],
        generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(
        sparql_query, [0, 1])
    answers_iter = iter(
        questions_generator.get_sparql_query_results(sparql_query)['results']
        ['bindings'][0].values())
    old_answer_id_1 = next(answers_iter)['value'].split("/")[-1]
    old_answer_id_2 = next(answers_iter)['value'].split("/")[-1]
    found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [
        "select distinct ?ans ?ansLabel where {?ans ?rel wd:" + old_answer_id_1 + " . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
            "FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20", "select distinct ?ans ?ansLabel where {?ans ?rel wd:" + old_answer_id_2 + \
                " . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20"])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return two_intentions_right_subgraph_entity_2_generation(
            current_uid, question_template, generated_questions,
            old_entities_ids, old_entities)
    else:
        two_intentions_right_subgraph_nnqt_question_construction(
            generated_template)
        return generated_template
Esempio n. 2
0
def unknown_entity_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
    # In this case the procedure is different from normal: it takes the answer query, modifies it to obtain all entities of the same type or class of the
    # original answer that return results, and then uses this list to exclude these entities from possible candidates
    general_sparql_query = sparql_query.replace("wd:" + old_entities_ids[0], "?ans")
    # Get the substring between "{" and "}"
    general_sparql_query = re.findall("{(.+)}", general_sparql_query)[0]
    type_common_string = "?ans wdt:|rel_entity_type| wd:|entity_type|"
    """query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" " + \
        "&& NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 20" """
    # This version of the query is more complete and adapted for "entity_3" logic, but is also slow and unsafe, since sometimes raises a server error
    old_answer = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1]
    # Find answer filter and type
    old_answer_filter, _ = questions_generator.get_filter_from_element(old_answer, "obj", "s", False)
    query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans ?rel ?obj . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" && " + \
        old_answer_filter + "NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 5" 
    found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [query])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return unknown_entity_2_generation(current_uid, question_template, generated_questions, old_entities_ids, old_entities)
    else:
        unknown_nnqt_question_construction(generated_template)
        return generated_template
def statement_property_entity_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    # Check if there is a filter: if not so the last element is an entity
    query_contains_filter = 'filter' in sparql_query.lower()
    if query_contains_filter:
        old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
    else:
        # If there isn't a filter then the last element is an entity
        old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0, 1])
    # In this case the procedure is different from normal: it takes the answer query, modifies it to obtain all entities of the same type or class of the
    # original answer that return results, and then uses this list to exclude these entities from possible candidates
    general_sparql_query = sparql_query.replace("wd:" + old_entities_ids[0], "?ans")
    # Get the substring between "{" and "}"
    general_sparql_query = re.findall("{(.+)}", general_sparql_query)[0]
    type_common_string = "?ans wdt:|rel_entity_type| wd:|entity_type|"
    """first_query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" " + \
        "&& NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 20" """
    # This version of the query is more complete and adapted for "entity_3" logic, but is also slow and unsafe, since sometimes raises a server error
    old_answer = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1]
    # Find answer filter and type
    old_answer_filter, _ = questions_generator.get_filter_from_element(old_answer, "obj", "s", False)
    first_query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans ?rel ?obj . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" && " + \
        old_answer_filter + "NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 5" 
    if query_contains_filter:
        found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [first_query])
    else:
        # In this case there is an additional normal query with two triples that link the first known entity to the second one
        found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [first_query,
            "select ?ans ?ansLabel where {wd:" + old_entities_ids[0] + " ?rel ?s . ?s ?rel2 ?ans . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
                "FILTER (LANG(?ansLabel) = \"en\" && REGEX(STR(?s), \"Q(\\\\d+)-\") && ?ans not in (wd:|old_entity_id|))} LIMIT 20"])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return statement_property_entity_2_generation(current_uid, question_template, generated_questions, old_entities_ids, old_entities)
    else:
        statement_property_nnqt_question_construction(generated_template)
        return generated_template
def right_subgraph_2_entity_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0])
    answer_var_name = re.findall(r'SELECT (\?\w*) WHERE', sparql_query, re.IGNORECASE)[0]
    entity_var_name = re.findall(r'. (\?\w*) wdt:', sparql_query)[0]
    modified_sparql_query = sparql_query.replace(answer_var_name, entity_var_name, 1)
    modified_answer_entity = questions_generator.get_sparql_query_results(modified_sparql_query)['results']['bindings'][0][entity_var_name[1:]]['value'].split("/")[-1]
    modified_answer_filter, _ = questions_generator.get_filter_from_element(modified_answer_entity, "obj", "", False)
    if modified_answer_filter:
        query = "select ?ans ?ansLabel where {?ans ?rel ?obj . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
            "FILTER (" + modified_answer_filter + "LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20"
    else:
        query = "select ?ans ?ansLabel where {?ans ?rel wd:" + modified_answer_entity + " . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
            "FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20"
    found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [query])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return right_subgraph_2_entity_2_generation(current_uid, question_template, generated_questions, old_entities_ids, old_entities)
    else:
        right_subgraph_2_nnqt_question_construction(generated_template)
        return generated_template
def string_matching_simple_contains_word_entity_3_generation(
        current_uid: int, question_template: Dict[str, Any],
        generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]:
    generated_template = deepcopy(question_template)
    sparql_query = generated_template['sparql_wikidata']
    old_entities_ids = questions_generator.get_elements_from_query(
        sparql_query, [0])
    old_answer_id = next(
        iter(
            questions_generator.get_sparql_query_results(sparql_query)
            ['results']['bindings'][0].values()))['value'].split("/")[-1]
    found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [
        "select ?ans ?ansLabel where {wd:" + old_answer_id + " ?rel ?ans . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \
            "FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20"])
    if not found:
        # There aren't valid candidates, so try with a random entity of the same type or class
        return string_matching_simple_contains_word_entity_2_generation(
            current_uid, question_template, generated_questions,
            old_entities_ids, old_entities)
    else:
        string_matching_simple_contains_word_nnqt_question_construction(
            generated_template)
        return generated_template