def run_dir(indirpath, outdirpath, answerfile, n): """ takes a directory containing gzip files of informative articles and preprocesses them (if necessary) and performs QA on them params ---- indirpath = directory containing gzip files outdirpath = directory that preprocessed files should be written to answerfile = file path that results of QA should be output to n = number of answers desired per question """ qa = CategorizeQs() dic = qa.get_qtypes(questions_dir) infiles = dircache.listdir(indirpath) ans = "" count = 201 for infile in infiles: if ".gz" in infile: print count count += 1 outpath = outdirpath + infile + ".pos" (gzfile, posfile, ner_file, qn) = tag_file_by_name(infile, outpath) ans += best_guess(n, ner_file, posfile, gzfile, qn, dic[str(qn)]) + "\n\n" output(answerfile, ans)
def run_files_in_range(start, stop, outfile, n): """ executes run_file on all question numbers in [start, stop] Note: to do one file, have start and stop be the same number. params ---- start = the first question to be processed stop = the last question to be processed Note: this is inclusive - both start and stop, as well as all numbers in between, will be processed outfile = path to the file that results will be written to n = number of top guesses per question """ qa = CategorizeQs() dic = qa.get_qtypes(questions_dir) ans = "" for i in range(start, stop + 1): ans += run_file(i, n, dic) + "\n\n" output(outfile, ans)