예제 #1
0
if __name__ == '__main__':
    # parse options
    opts, filenames = getopt.getopt(sys.argv[1:], 'hle:mft:')
    options = {}
    help = False
    encoding = DEFAULT_ENCODING
    factor = None
    for opt, arg in opts:
        if opt == '-l':
            options['lowercase'] = True
        elif opt == '-h':
            help = True
        elif opt == '-e':
            encoding = arg
        elif opt == '-m':
            options['moses_escape'] = True
        elif opt == '-f':
            factor = int(arg)
        elif opt == '-t':
            options['num_t'] = int(arg)
    # display help
    if len(filenames) > 2 or help:
        display_usage()
        sys.exit(1)
    # process the input
    tok = Tokenizer(options)
    proc_func = tok.tokenize if factor is None else \
        lambda text: tok.tokenize_factored_text(text, factor)
    process_lines(proc_func, filenames, encoding)
예제 #2
0
    print >> sys.stderr, __doc__


if __name__ == '__main__':
    # parse options
    opts, filenames = getopt.getopt(sys.argv[1:], 'hle:mf:')
    options = {}
    help = False
    encoding = DEFAULT_ENCODING
    factor = None
    for opt, arg in opts:
        if opt == '-l':
            options['lowercase'] = True
        elif opt == '-h':
            help = True
        elif opt == '-e':
            encoding = arg
        elif opt == '-m':
            options['moses_escape'] = True
        elif opt == '-f':
            factor = int(arg)
    # display help
    if len(filenames) > 2 or help:
        display_usage()
        sys.exit(1)
    # process the input
    tok = Tokenizer(options)
    proc_func = tok.tokenize if factor is None else \
            lambda text: tok.tokenize_factored_text(text, factor)
    process_lines(proc_func, filenames, encoding)
예제 #3
0
def display_usage():
    """\
    Display program usage information.
    """
    print >> sys.stderr, __doc__


if __name__ == '__main__':
    # parse options
    opts, filenames = getopt.getopt(sys.argv[1:], 'e:hcl:')
    options = {}
    help = False
    encoding = DEFAULT_ENCODING
    for opt, arg in opts:
        if opt == '-e':
            encoding = arg
        elif opt == '-l':
            options['language'] = arg
        elif opt == '-c':
            options['capitalize_sents'] = True
        elif opt == '-h':
            help = True
    # display help
    if len(filenames) > 2 or help:
        display_usage()
        sys.exit(1)
    # process the input
    detok = Detokenizer(options)
    process_lines(detok.detokenize, filenames, encoding)
예제 #4
0
def display_usage():
    """\
    Display program usage information.
    """
    print >> sys.stderr, __doc__


if __name__ == '__main__':
    # parse options
    opts, filenames = getopt.getopt(sys.argv[1:], 'e:hcl:')
    options = {}
    help = False
    encoding = DEFAULT_ENCODING
    for opt, arg in opts:
        if opt == '-e':
            encoding = arg
        elif opt == '-l':
            options['language'] = arg
        elif opt == '-c':
            options['capitalize_sents'] = True
        elif opt == '-h':
            help = True
    # display help
    if len(filenames) > 2 or help:
        display_usage()
        sys.exit(1)
    # process the input
    detok = Detokenizer(options)
    process_lines(detok.detokenize, filenames, encoding)