def _load_jobs(idir, df): jobs = [] for cat in data.get_cats(): catdir = os.path.join(idir, cat) if not os.path.exists(catdir) or not os.path.isdir(catdir): import sys sys.stderr.write(u'{} does not exist or is not a directory.\n') sys.stderr.flush() continue for disasterfile in os.listdir(catdir): dpath = os.path.join(catdir, disasterfile) dname = dfile2dname(disasterfile) jobs.append((cat, dname, dpath, df)) return jobs
parser = argparse.ArgumentParser() parser.add_argument('--outputdir', help='directory to write text', type=str) parser.add_argument('--separate-subsection-files', action='store_true') args = parser.parse_args() sep_sub_file = args.separate_subsection_files if args.outputdir is None: outdir = 'disaster_text' else: outdir = args.outputdir for cat in data.get_cats(): abstract = os.path.join(outdir, cat, 'abstract_text') if not os.path.exists(abstract): os.makedirs(abstract) impact = os.path.join(outdir, cat, 'impact_text') if not os.path.exists(impact): os.makedirs(impact) history = os.path.join(outdir, cat, 'history_text') if not os.path.exists(history): os.makedirs(history) print u'Output root directory set to: {}'.format(unicode(outdir)) import sys pool = Pool(4)