Example #1
0
def generate_templates(label,project_name,depth=1,output_file="sentence_and_template_generator", paraphraser=False):
    """
    The function acts as a wrapper for the whole package of supplied source code.
    """
    val = generate_url(label)
    url = val[0]
    about = (val[1])
    count =0
    vessel= []  
    depth=int(depth)
    diction = fetch_ranks("../utility/part-r-00000")
    if(not os.path.isdir(project_name)):
        os.makedirs(project_name)
    output_file = open(project_name+"/" + output_file, 'w')
    test_set = open(project_name+"/" + "test.csv", 'w')
    if paraphraser:
        expand_set = open(project_name+"/" + "expand.csv", 'w')
    prop_dic = {}
    for iterator in range(depth):
        prop_dic[iterator] = []
    # Create a logger object
    logger = logging.getLogger()

    # Configure logger
    logging.basicConfig(filename=project_name+"/logfile.log", format='%(filename)s: %(message)s', filemode='w')

    # Setting threshold level
    logger.setLevel(logging.WARNING)

    # Use the logging methods
    #logger.debug("This is a debug message")  
    logger.info("This is a log file.")  
    #logger.warning("This is a warning message")  
    #logger.error("This is an error message")  
    #logger.critical("This is a critical message")
    if paraphraser:
        folder_path = get_pretrained_model(const.URL)
        set_seed(42)
        tokenizer, device, model = prepare_model(folder_path)

    list_of_property_information = get_properties(url=url,project_name=project_name,output_file = "get_properties.csv")
    for property_line in list_of_property_information:
        count+=1
        prop = property_line.split(',')
        print("**************\n"+str(prop))
        if paraphraser:
            sentence_and_template_generator(original_count=depth,prop_dic=prop_dic,test_set=test_set,log=logger,diction=diction,output_file=output_file,mother_ontology=about.strip().replace("http://dbpedia.org/ontology/","dbo:"),vessel=vessel,project_name=project_name ,prop=prop, suffix = " of <A> ?",count = depth,expand_set=expand_set,tokenizer=tokenizer,device=device,model=model)
        else:
            sentence_and_template_generator(original_count=depth, prop_dic=prop_dic, test_set=test_set, log=logger,
                                            diction=diction, output_file=output_file,
                                            mother_ontology=about.strip().replace("http://dbpedia.org/ontology/",
                                                                                  "dbo:"), vessel=vessel,
                                            project_name=project_name, prop=prop, suffix=" of <A> ?", count=depth)

    output_file.close()
Example #2
0
def generate_templates(label,project_name,depth=1,output_file="sentence_and_template_generator"):
    """
    Funtion to generate templates | wrapper function for rest of the functions. 
    """
    val = generate_url(label)
    url = val[0]
    about = (val[1])
    count =0
    vessel= []  
    
    diction = fetch_ranks("../utility/part-r-00000")
    if(not os.path.isdir(project_name)):
        os.makedirs(project_name)
    output_file = open(project_name+"/" + output_file, 'w')
    
    # Create a logger object
    logger = logging.getLogger()

    # Configure logger
    logging.basicConfig(filename=project_name+"/logfile.log", format='%(filename)s: %(message)s', filemode='w')

    # Setting threshold level
    logger.setLevel(logging.DEBUG)

    # Use the logging methods
    #logger.debug("This is a debug message")  
    logger.info("This is a log file.")  
    #logger.warning("This is a warning message")  
    #logger.error("This is an error message")  
    #logger.critical("This is a critical message")   

    list_of_property_information = get_properties(url=url,project_name=project_name,output_file = "get_properties.csv")
    for property_line in list_of_property_information:
        count+=1
        prop = property_line.split(',')
        print("**************\n"+str(prop))
        sentence_and_template_generator(log=logger,diction=diction,output_file=output_file,mother_ontology=about.strip().replace("http://dbpedia.org/ontology/","dbo:"),vessel=vessel,project_name=project_name ,prop=prop, suffix = " of <A> ?",count = 2)
    output_file.close()    
Example #3
0
def sentence_and_template_generator(prop_dic,
                                    test_set,
                                    log,
                                    mother_ontology,
                                    vessel,
                                    prop,
                                    project_name,
                                    output_file,
                                    diction,
                                    expand_set=[],
                                    tokenizer=None,
                                    device=None,
                                    model=None,
                                    original_count=0,
                                    count=0,
                                    suffix=" of <A> ?",
                                    query_suffix=""):

    if (type(prop) == str):
        prop = prop.split(',')
    #original_count = count
    natural_language_question = []
    sparql_query = []
    expanded_nl_question = []
    expanded_sparql_query = []
    question_form = open("../utility/question_form.csv", 'r').readlines()
    question_starts_with = question_form[0].split(',')
    query_starts_with = question_form[1].split(',')
    query_ends_with = question_form[2].split(',')
    question_number = [2]
    if (prop[3] == "owl:Thing" or prop[3] == "xsd:string"):
        question_number = [2, 4]
    elif (prop[3] == "Place"):
        question_number = [3, 4]
    elif (prop[3] == "Person"):
        question_number = [1, 4]
    elif (prop[3] == "xsd:date" or "date" in prop[3] or "year" in prop[3]
          or "date" in prop[3] or "time" in prop[3]):
        question_number = [0, 4, 5]
    elif (prop[3] == "xsd:nonNegativeInteger"
          or "negative" in prop[3].lower()):
        question_number = [2, 6]
    elif (prop[3] == "xsd:integer" or "integer" in prop[3].lower()):
        question_number = [2, 6]
    else:
        question_number = [2]

    val = (generate_url_spec(prop[0]))
    prop_link = val[0]
    if (prop_link == "None" or prop_link == None):
        return
    derived = val[1]
    prop_link = "dbo:" + prop_link.strip().split(
        'http://dbpedia.org/ontology/')[-1]

    for number in question_number:
        original_question = question_starts_with[number] + prop[1] + suffix
        original_sparql = query_starts_with[
            number] + "where { <A>  " + query_suffix + prop_link + " ?x " + query_ends_with[
                number]
        natural_language_question.append(original_question)
        sparql_query.append(original_sparql)

    if (query_suffix == ""):
        query_answer = ("select distinct(?a) where { ?a " + prop_link +
                        " []  } ")
    else:
        query_answer = ("select distinct(?a) where { ?a " +
                        query_suffix.split(" ")[0] + " [] . ?a  " +
                        query_suffix + " " + prop_link + " ?x } ")

    if (query_suffix == ""):
        flag = (check_query(log=log,
                            query=query_answer.replace("select distinct(?a)",
                                                       "ask")))
    else:
        flag = (check_query(log=log,
                            query=query_answer.replace("select distinct(?a)",
                                                       "ask")))
    if (not flag):
        return

    rank = rank_check(diction=diction,
                      count=count,
                      query=query_answer,
                      original_count=original_count)

    count = count - 1
    if (count == 0):
        variable = "?x"
    else:
        variable = "?x" + str(count)
    query_suffix = prop_link + " " + variable + " . " + variable + " "
    #for temp_counter in range(original_count):
    if (not prop[0] in prop_dic[original_count - count - 1]):
        for number in range(len(natural_language_question)):
            if count == original_count - 1 and device:
                final_candidates = paraphrase_questions(
                    tokenizer, device, model, original_question)
                final_quesiton = pick_final_sentence(original_question,
                                                     final_candidates)

                expanded_nl_question.append(final_quesiton)
                expanded_sparql_query.append(original_sparql)
            if expanded_sparql_query:
                expand_line = [
                    mother_ontology, "", "", expanded_nl_question[number],
                    expanded_sparql_query[number], query_answer
                ]
                expand_set.write(
                    (';'.join(expand_line) + ";" + str(rank) + ";" +
                     "Paraphrased" + "\n").replace("  ", " "))
            vessel.append([
                mother_ontology, "", "", natural_language_question[number],
                sparql_query[number], query_answer
            ])
            output_file.write((';'.join(vessel[-1]) + ";" + str(rank) + ";" +
                               "Original" + "\n").replace("  ", " "))
            log.info(';'.join(vessel[-1]) + str(rank) + "\n")

    else:
        for number in range(len(natural_language_question)):
            if expanded_sparql_query:
                expand_line = [
                    mother_ontology, "", "", expanded_sparql_query[number],
                    expanded_sparql_query[number], query_answer
                ]
                expand_set.write(
                    (';'.join(expand_line) + ";" + str(rank) + "\n").replace(
                        "  ", " "))
            vessel.append([
                mother_ontology, "", "", natural_language_question[number],
                sparql_query[number], query_answer
            ])
            test_set.write(
                (';'.join(vessel[-1]) + ";" + str(rank) + "\n").replace(
                    "  ", " "))
            print("++++++++++++++++++++", vessel[-1], "+++++++++++++++")
            log.info("Test: " + ';'.join(vessel[-1]) + str(rank) + "\n")

    prop_dic[original_count - count - 1].append(prop[0])
    #print(str(natural_language_question)+"\n"+str(sparql_query)+"\n"+query_answer+"\n*************")

    suffix = " of " + prop[1] + " of <A> ?"

    if (count > 0):
        print(prop[3].split(":")[-1])
        val = generate_url(prop[3].split(":")[-1])
        url = val[0]
        if (not url.startswith("http://mappings.dbpedia.org")):
            return
        list_of_property_information = get_properties(
            url=url, project_name=project_name, output_file=prop[1] + ".csv")
        for property_line in tqdm(list_of_property_information):
            prop_inside = property_line.split(',')
            sentence_and_template_generator(expand_set=expand_set,
                                            prop_dic=prop_dic,
                                            test_set=test_set,
                                            log=log,
                                            original_count=original_count,
                                            diction=diction,
                                            output_file=output_file,
                                            mother_ontology=mother_ontology,
                                            vessel=vessel,
                                            prop=prop_inside,
                                            suffix=suffix,
                                            count=count,
                                            project_name=project_name,
                                            query_suffix=query_suffix,
                                            tokenizer=tokenizer,
                                            device=device,
                                            model=model)
Example #4
0
        variable = "?x"
    else:
        variable = "?x" + str(count)
    query_suffix = prop_link + " " + variable + " . " + variable + " "

    for number in range(len(natural_language_question)):
        vessel.append([mother_ontology, "", "", natural_language_question[number], sparql_query[number], query_answer])
        output_file.write((';'.join(vessel[-1]) + ";" + str(rank) + "\n").replace("  ", " "))
        log.info(';'.join(vessel[-1]) + str(rank) + "\n")
    # print(str(natural_language_question)+"\n"+str(sparql_query)+"\n"+query_answer+"\n*************")

    suffix = " of " + prop[1] + " of <A> ?"

    if (count > 0):
        print(prop[3].split(":")[-1])
        val = generate_url(prop[3].split(":")[-1])
        url = val[0]
        if (not url.startswith("http://mappings.dbpedia.org")):
            return
        list_of_property_information = get_properties(url=url, project_name=project_name, output_file=prop[1] + ".csv")
        for property_line in tqdm(list_of_property_information):
            prop_inside = property_line.split(',')
            sentence_and_template_generator(log=log, original_count=original_count, diction=diction,
                                            output_file=output_file, mother_ontology=mother_ontology, vessel=vessel,
                                            prop=prop_inside, suffix=suffix, count=count, project_name=project_name,
                                            query_suffix=query_suffix)


if __name__ == "__main__":
    """
    Section to parse the command line arguments.
Example #5
0
import sys
from zip_folder import zip_folder
from generate_url import generate_url

print('Zipping folder...', end = "")
zip_folder(sys.argv[1])
print('Done')


print('Generating url...', end = "")
url = generate_url(sys.argv[1])
print('Done')

print('\n')
print('Download url is:')
print('localhost:5000' + url)