if __name__ == '__main__': # parse options opts, filenames = getopt.getopt(sys.argv[1:], 'hle:mft:') options = {} help = False encoding = DEFAULT_ENCODING factor = None for opt, arg in opts: if opt == '-l': options['lowercase'] = True elif opt == '-h': help = True elif opt == '-e': encoding = arg elif opt == '-m': options['moses_escape'] = True elif opt == '-f': factor = int(arg) elif opt == '-t': options['num_t'] = int(arg) # display help if len(filenames) > 2 or help: display_usage() sys.exit(1) # process the input tok = Tokenizer(options) proc_func = tok.tokenize if factor is None else \ lambda text: tok.tokenize_factored_text(text, factor) process_lines(proc_func, filenames, encoding)
print >> sys.stderr, __doc__ if __name__ == '__main__': # parse options opts, filenames = getopt.getopt(sys.argv[1:], 'hle:mf:') options = {} help = False encoding = DEFAULT_ENCODING factor = None for opt, arg in opts: if opt == '-l': options['lowercase'] = True elif opt == '-h': help = True elif opt == '-e': encoding = arg elif opt == '-m': options['moses_escape'] = True elif opt == '-f': factor = int(arg) # display help if len(filenames) > 2 or help: display_usage() sys.exit(1) # process the input tok = Tokenizer(options) proc_func = tok.tokenize if factor is None else \ lambda text: tok.tokenize_factored_text(text, factor) process_lines(proc_func, filenames, encoding)
def display_usage(): """\ Display program usage information. """ print >> sys.stderr, __doc__ if __name__ == '__main__': # parse options opts, filenames = getopt.getopt(sys.argv[1:], 'e:hcl:') options = {} help = False encoding = DEFAULT_ENCODING for opt, arg in opts: if opt == '-e': encoding = arg elif opt == '-l': options['language'] = arg elif opt == '-c': options['capitalize_sents'] = True elif opt == '-h': help = True # display help if len(filenames) > 2 or help: display_usage() sys.exit(1) # process the input detok = Detokenizer(options) process_lines(detok.detokenize, filenames, encoding)