Example #1
0
def run_dir(indirpath, outdirpath, answerfile, n):
    """
    takes a directory containing gzip files of informative articles
    and preprocesses them (if necessary) and performs QA on them

    params
    ----
    indirpath = directory containing gzip files
    outdirpath = directory that preprocessed files should be written to
    answerfile = file path that results of QA should be output to
    n = number of answers desired per question
    """
    qa = CategorizeQs()
    dic = qa.get_qtypes(questions_dir)
    infiles = dircache.listdir(indirpath)
    ans = ""
    count = 201
    for infile in infiles:
        if ".gz" in infile:
            print count
            count += 1
            outpath = outdirpath + infile + ".pos"
            (gzfile, posfile, ner_file, qn) = tag_file_by_name(infile, outpath)
            ans += best_guess(n, ner_file, posfile, gzfile, qn, dic[str(qn)]) + "\n\n"
    output(answerfile, ans)
Example #2
0
def run_file(file_num, n, qa):
    """
    takes a single question number and performs question-answering on that
    question

    params
    ----
    file_num = number of question being answered
    outfile = filepath for the results of QA
    n = number of answers desired
    qa = question type mapping
    """
    (posfile, gzip_file, ner_file) = tag_file_by_num(file_num, "top_docs." + str(file_num) + ".pos")
    ans = best_guess(n, ner_file, posfile, gzip_file, file_num, qa[str(file_num)])
    return ans