Exemple #1
0
def main():
    usage_string = \
        'Usage: morpho_tagger.py ' \
        '--input <input path> ' \
        '--output <output path> [options]'
    parser = configure_option_parser(usage_string)
    (options, args) = parser.parse_args()

    config.generate_config(options)
    if not options.input or not options.output:
        parser.print_help()
        exit(0)

    inpath = os.path.abspath(options.input)
    outpath = os.path.abspath(options.output)

    retcode = 0
    jobs_number = config.CONFIG['jobs_number']
    if os.path.isdir(inpath):
        fs_walk.process_directory(inpath, outpath, task_list.add_task)
    else:
        task_list.add_task(inpath, outpath)
    if 1 < jobs_number:
        worker_pool =\
            multiprocessing.Pool(processes=jobs_number,
                                 initializer=initialize_lemmers,
                                 initargs=[options])
        return_codes = task_list.execute_tasks(convert, worker_pool)
        retcode = sum([1 if code is not None else 0 for code in return_codes])
    else:
        initialize_lemmers(options)
        retcode = True
        for paths_pair in task_list.TASKS:
            retcode &= convert(paths_pair) is not None
    return retcode
def main():
    usage_string = \
        'Usage: morpho_tagger.py ' \
        '--input <input path> ' \
        '--output <output path> [options]'
    parser = configure_option_parser(usage_string)
    (options, args) = parser.parse_args()

    config.generate_config(options)
    if not options.input or not options.output:
        parser.print_help()
        exit(0)

    inpath = os.path.abspath(options.input)
    outpath = os.path.abspath(options.output)

    retcode = 0
    jobs_number = config.CONFIG['jobs_number']
    if os.path.isdir(inpath):
        fs_walk.process_directory(inpath, outpath, task_list.add_task)
    else:
        task_list.add_task(inpath, outpath)
    if 1 < jobs_number:
        worker_pool =\
            multiprocessing.Pool(processes=jobs_number,
                                 initializer=initialize_lemmers,
                                 initargs=[options])
        return_codes = task_list.execute_tasks(convert, worker_pool)
        retcode = sum([1 if code is not None else 0 for code in return_codes])
    else:
        initialize_lemmers(options)
        retcode = True
        for paths_pair in task_list.TASKS:
            retcode &= convert(paths_pair) is not None
    return retcode
def main():
    usage_string = 'Usage: tokenizer.py --input <input path> --output <output path>'
    parser = optparse.OptionParser(usage=usage_string)
    parser.add_option('--input', dest='input', help='input path - directory or file')
    parser.add_option('--output', dest='output', help='output path - directory or file')
    parser.add_option('--output_encoding', dest='out_encoding', help='encoding of the output files', default='cp1251')
    parser.add_option('--jobs', dest='jobs_number', help='concurrent jobs number', default='1')

    (options, args) = parser.parse_args()
    config.generate_config(options)
    if not options.input or not options.output:
        parser.print_help()
        exit(0)
    inpath = os.path.abspath(options.input)
    outpath = os.path.abspath(options.output)

    if os.path.isdir(inpath):
        print 'Collecting tasks...'
        fs_walk.process_directory(inpath, outpath, task_list.add_task)
        print 'Starting processing...'
        child_retcodes = task_list.execute_tasks(convert_and_log)
        retcode = sum([1 if code != 0 else 0 for code in child_retcodes])
    else:
        retcode = convert_and_log((inpath, outpath))
    return retcode
Exemple #4
0
def main():
    usage_string = 'Usage: tokenizer.py --input <input path> --output <output path>'
    parser = optparse.OptionParser(usage=usage_string)
    parser.add_option('--input',
                      dest='input',
                      help='input path - directory or file')
    parser.add_option('--output',
                      dest='output',
                      help='output path - directory or file')
    parser.add_option('--output_encoding',
                      dest='out_encoding',
                      help='encoding of the output files',
                      default='cp1251')
    parser.add_option('--jobs',
                      dest='jobs_number',
                      help='concurrent jobs number',
                      default=1,
                      type='int')

    (options, args) = parser.parse_args()
    config.generate_config(options)
    if not options.input or not options.output:
        parser.print_help()
        exit(0)
    inpath = os.path.abspath(options.input)
    outpath = os.path.abspath(options.output)

    if os.path.isdir(inpath):
        print 'Collecting tasks...'
        fs_walk.process_directory(inpath, outpath, task_list.add_task)
    else:
        task_list.TASKS.append(inpath, outpath)
    jobs_number = config.CONFIG['jobs_number']
    print 'Starting processing...'
    if 1 < jobs_number:
        child_retcodes = task_list.execute_tasks(convert_and_log)
        retcode = sum([1 if code != 0 else 0 for code in child_retcodes])
    else:
        retcode = True
        for paths_pair in task_list.TASKS:
            retcode &= convert_and_log(paths_pair)
    return retcode
Exemple #5
0
def main():

    usage_string = 'Usage: annotate_texts.py --input <input path> --output <output path> [options]'
    parser = morpho_tagger.configure_option_parser(usage_string)
    (options, args) = parser.parse_args()

    config.generate_config(options)
    if not options.input or not options.output:
        parser.print_help()
        exit(0)

    inpath = os.path.abspath(options.input)
    outpath = os.path.abspath(options.output)

    morpho_tagger.initialize_lemmers(options)

    retcode = 0
    if os.path.isdir(inpath):
        fs_walk.process_directory(inpath, outpath, task_list.add_task)
        return_codes = task_list.execute_tasks(convert)
        retcode = sum([1 if code is not None else 0 for code in return_codes])
    else:
        retcode = convert((inpath, outpath)) is not None
    return retcode
def main():

    usage_string = 'Usage: annotate_texts.py --input <input path> --output <output path> [options]'
    parser = morpho_tagger.configure_option_parser(usage_string)
    (options, args) = parser.parse_args()

    config.generate_config(options)
    if not options.input or not options.output:
        parser.print_help()
        exit(0)

    inpath = os.path.abspath(options.input)
    outpath = os.path.abspath(options.output)

    morpho_tagger.initialize_lemmers(options)

    retcode = 0
    if os.path.isdir(inpath):
        fs_walk.process_directory(inpath, outpath, task_list.add_task)
        return_codes = task_list.execute_tasks(convert)
        retcode = sum([1 if code is not None else 0 for code in return_codes])
    else:
        retcode = convert((inpath, outpath)) is not None
    return retcode