def sentence_and_template_generator(prop_dic, test_set, log, mother_ontology, vessel, prop, project_name, output_file, diction, expand_set=[], tokenizer=None, device=None, model=None, original_count=0, count=0, suffix=" of <A> ?", query_suffix=""): if (type(prop) == str): prop = prop.split(',') #original_count = count natural_language_question = [] sparql_query = [] expanded_nl_question = [] expanded_sparql_query = [] question_form = open("../utility/question_form.csv", 'r').readlines() question_starts_with = question_form[0].split(',') query_starts_with = question_form[1].split(',') query_ends_with = question_form[2].split(',') question_number = [2] if (prop[3] == "owl:Thing" or prop[3] == "xsd:string"): question_number = [2, 4] elif (prop[3] == "Place"): question_number = [3, 4] elif (prop[3] == "Person"): question_number = [1, 4] elif (prop[3] == "xsd:date" or "date" in prop[3] or "year" in prop[3] or "date" in prop[3] or "time" in prop[3]): question_number = [0, 4, 5] elif (prop[3] == "xsd:nonNegativeInteger" or "negative" in prop[3].lower()): question_number = [2, 6] elif (prop[3] == "xsd:integer" or "integer" in prop[3].lower()): question_number = [2, 6] else: question_number = [2] val = (generate_url_spec(prop[0])) prop_link = val[0] if (prop_link == "None" or prop_link == None): return derived = val[1] prop_link = "dbo:" + prop_link.strip().split( 'http://dbpedia.org/ontology/')[-1] for number in question_number: original_question = question_starts_with[number] + prop[1] + suffix original_sparql = query_starts_with[ number] + "where { <A> " + query_suffix + prop_link + " ?x " + query_ends_with[ number] natural_language_question.append(original_question) sparql_query.append(original_sparql) if (query_suffix == ""): query_answer = ("select distinct(?a) where { ?a " + prop_link + " [] } ") else: query_answer = ("select distinct(?a) where { ?a " + query_suffix.split(" ")[0] + " [] . ?a " + query_suffix + " " + prop_link + " ?x } ") if (query_suffix == ""): flag = (check_query(log=log, query=query_answer.replace("select distinct(?a)", "ask"))) else: flag = (check_query(log=log, query=query_answer.replace("select distinct(?a)", "ask"))) if (not flag): return rank = rank_check(diction=diction, count=count, query=query_answer, original_count=original_count) count = count - 1 if (count == 0): variable = "?x" else: variable = "?x" + str(count) query_suffix = prop_link + " " + variable + " . " + variable + " " #for temp_counter in range(original_count): if (not prop[0] in prop_dic[original_count - count - 1]): for number in range(len(natural_language_question)): if count == original_count - 1 and device: final_candidates = paraphrase_questions( tokenizer, device, model, original_question) final_quesiton = pick_final_sentence(original_question, final_candidates) expanded_nl_question.append(final_quesiton) expanded_sparql_query.append(original_sparql) if expanded_sparql_query: expand_line = [ mother_ontology, "", "", expanded_nl_question[number], expanded_sparql_query[number], query_answer ] expand_set.write( (';'.join(expand_line) + ";" + str(rank) + ";" + "Paraphrased" + "\n").replace(" ", " ")) vessel.append([ mother_ontology, "", "", natural_language_question[number], sparql_query[number], query_answer ]) output_file.write((';'.join(vessel[-1]) + ";" + str(rank) + ";" + "Original" + "\n").replace(" ", " ")) log.info(';'.join(vessel[-1]) + str(rank) + "\n") else: for number in range(len(natural_language_question)): if expanded_sparql_query: expand_line = [ mother_ontology, "", "", expanded_sparql_query[number], expanded_sparql_query[number], query_answer ] expand_set.write( (';'.join(expand_line) + ";" + str(rank) + "\n").replace( " ", " ")) vessel.append([ mother_ontology, "", "", natural_language_question[number], sparql_query[number], query_answer ]) test_set.write( (';'.join(vessel[-1]) + ";" + str(rank) + "\n").replace( " ", " ")) print("++++++++++++++++++++", vessel[-1], "+++++++++++++++") log.info("Test: " + ';'.join(vessel[-1]) + str(rank) + "\n") prop_dic[original_count - count - 1].append(prop[0]) #print(str(natural_language_question)+"\n"+str(sparql_query)+"\n"+query_answer+"\n*************") suffix = " of " + prop[1] + " of <A> ?" if (count > 0): print(prop[3].split(":")[-1]) val = generate_url(prop[3].split(":")[-1]) url = val[0] if (not url.startswith("http://mappings.dbpedia.org")): return list_of_property_information = get_properties( url=url, project_name=project_name, output_file=prop[1] + ".csv") for property_line in tqdm(list_of_property_information): prop_inside = property_line.split(',') sentence_and_template_generator(expand_set=expand_set, prop_dic=prop_dic, test_set=test_set, log=log, original_count=original_count, diction=diction, output_file=output_file, mother_ontology=mother_ontology, vessel=vessel, prop=prop_inside, suffix=suffix, count=count, project_name=project_name, query_suffix=query_suffix, tokenizer=tokenizer, device=device, model=model)
question_number = [2, 4] elif (prop[3] == "Place"): question_number = [3, 4] elif (prop[3] == "Person"): question_number = [1, 4] elif (prop[3] == "xsd:date" or "date" in prop[3] or "year" in prop[3].lower() or "date" in prop[ 3].lower() or "time" in prop[3].lower()): question_number = [0, 4, 5] elif (prop[3] == "xsd:nonNegativeInteger" or "negative" in prop[3].lower()): question_number = [2, 6] elif (prop[3] == "xsd:integer" or "integer" in prop[3].lower()): question_number = [2, 6] else: question_number = [2] val = (generate_url_spec(prop[0])) prop_link = val[0] if (prop_link == "None" or prop_link == None): return derived = val[1] prop_link = "dbo:" + prop_link.strip().split('http://dbpedia.org/ontology/')[-1] for number in question_number: natural_language_question.append(question_starts_with[number] + prop[1] + suffix) sparql_query.append( query_starts_with[number] + "where { <A> " + query_suffix + prop_link + " ?x " + query_ends_with[number]) if (query_suffix == ""): query_answer = ("select distinct(?a) where { ?a " + prop_link + " [] } ") else: query_answer = ("select distinct(?a) where { ?a " + query_suffix.split(" ")[