def run_dir(indirpath, outdirpath, answerfile, n): """ takes a directory containing gzip files of informative articles and preprocesses them (if necessary) and performs QA on them params ---- indirpath = directory containing gzip files outdirpath = directory that preprocessed files should be written to answerfile = file path that results of QA should be output to n = number of answers desired per question """ qa = CategorizeQs() dic = qa.get_qtypes(questions_dir) infiles = dircache.listdir(indirpath) ans = "" count = 201 for infile in infiles: if ".gz" in infile: print count count += 1 outpath = outdirpath + infile + ".pos" (gzfile, posfile, ner_file, qn) = tag_file_by_name(infile, outpath) ans += best_guess(n, ner_file, posfile, gzfile, qn, dic[str(qn)]) + "\n\n" output(answerfile, ans)
def run_file(file_num, n, qa): """ takes a single question number and performs question-answering on that question params ---- file_num = number of question being answered outfile = filepath for the results of QA n = number of answers desired qa = question type mapping """ (posfile, gzip_file, ner_file) = tag_file_by_num(file_num, "top_docs." + str(file_num) + ".pos") ans = best_guess(n, ner_file, posfile, gzip_file, file_num, qa[str(file_num)]) return ans