def main(): usage_string = \ 'Usage: morpho_tagger.py ' \ '--input <input path> ' \ '--output <output path> [options]' parser = configure_option_parser(usage_string) (options, args) = parser.parse_args() config.generate_config(options) if not options.input or not options.output: parser.print_help() exit(0) inpath = os.path.abspath(options.input) outpath = os.path.abspath(options.output) retcode = 0 jobs_number = config.CONFIG['jobs_number'] if os.path.isdir(inpath): fs_walk.process_directory(inpath, outpath, task_list.add_task) else: task_list.add_task(inpath, outpath) if 1 < jobs_number: worker_pool =\ multiprocessing.Pool(processes=jobs_number, initializer=initialize_lemmers, initargs=[options]) return_codes = task_list.execute_tasks(convert, worker_pool) retcode = sum([1 if code is not None else 0 for code in return_codes]) else: initialize_lemmers(options) retcode = True for paths_pair in task_list.TASKS: retcode &= convert(paths_pair) is not None return retcode
def main(): usage_string = 'Usage: tokenizer.py --input <input path> --output <output path>' parser = optparse.OptionParser(usage=usage_string) parser.add_option('--input', dest='input', help='input path - directory or file') parser.add_option('--output', dest='output', help='output path - directory or file') parser.add_option('--output_encoding', dest='out_encoding', help='encoding of the output files', default='cp1251') parser.add_option('--jobs', dest='jobs_number', help='concurrent jobs number', default='1') (options, args) = parser.parse_args() config.generate_config(options) if not options.input or not options.output: parser.print_help() exit(0) inpath = os.path.abspath(options.input) outpath = os.path.abspath(options.output) if os.path.isdir(inpath): print 'Collecting tasks...' fs_walk.process_directory(inpath, outpath, task_list.add_task) print 'Starting processing...' child_retcodes = task_list.execute_tasks(convert_and_log) retcode = sum([1 if code != 0 else 0 for code in child_retcodes]) else: retcode = convert_and_log((inpath, outpath)) return retcode
def main(): usage_string = 'Usage: tokenizer.py --input <input path> --output <output path>' parser = optparse.OptionParser(usage=usage_string) parser.add_option('--input', dest='input', help='input path - directory or file') parser.add_option('--output', dest='output', help='output path - directory or file') parser.add_option('--output_encoding', dest='out_encoding', help='encoding of the output files', default='cp1251') parser.add_option('--jobs', dest='jobs_number', help='concurrent jobs number', default=1, type='int') (options, args) = parser.parse_args() config.generate_config(options) if not options.input or not options.output: parser.print_help() exit(0) inpath = os.path.abspath(options.input) outpath = os.path.abspath(options.output) if os.path.isdir(inpath): print 'Collecting tasks...' fs_walk.process_directory(inpath, outpath, task_list.add_task) else: task_list.TASKS.append(inpath, outpath) jobs_number = config.CONFIG['jobs_number'] print 'Starting processing...' if 1 < jobs_number: child_retcodes = task_list.execute_tasks(convert_and_log) retcode = sum([1 if code != 0 else 0 for code in child_retcodes]) else: retcode = True for paths_pair in task_list.TASKS: retcode &= convert_and_log(paths_pair) return retcode
def main(): usage_string = 'Usage: annotate_texts.py --input <input path> --output <output path> [options]' parser = morpho_tagger.configure_option_parser(usage_string) (options, args) = parser.parse_args() config.generate_config(options) if not options.input or not options.output: parser.print_help() exit(0) inpath = os.path.abspath(options.input) outpath = os.path.abspath(options.output) morpho_tagger.initialize_lemmers(options) retcode = 0 if os.path.isdir(inpath): fs_walk.process_directory(inpath, outpath, task_list.add_task) return_codes = task_list.execute_tasks(convert) retcode = sum([1 if code is not None else 0 for code in return_codes]) else: retcode = convert((inpath, outpath)) is not None return retcode