def main(argv=None): """ThaiNLP command line.""" if not argv: argv = sys.argv parser = argparse.ArgumentParser( prog="thainlp", description="Thai natural language processing.", usage=("thainlp <command> [options]\n\n" "Example:\n\n" "thainlp data catalog\n\n" "--"), ) parser.add_argument( "command", type=str, choices=cli.COMMANDS, help="text processing action", ) args = parser.parse_args(argv[1:2]) cli.exit_if_empty(args.command, parser) if hasattr(cli, args.command): command = getattr(cli, args.command) command.App(argv)
def main(args=None): """The main routine of PyThaiNLP command line.""" if args is None: args = sys.argv[1:] parser = argparse.ArgumentParser( usage="pythainlp command [subcommand] [options]") parser.add_argument("command", type=str, default="", nargs="?", help="[%s]" % "|".join(cli.COMMANDS)) args = parser.parse_args(sys.argv[1:2]) cli.exit_if_empty(args.command, parser) if hasattr(cli, args.command): command = getattr(cli, args.command) command.App(sys.argv) else: print( f"Command not available: {args.command}\nPlease run with --help for alternatives" )
def __init__(self, argv): parser = argparse.ArgumentParser(**cli.make_usage("tokenize")) parser.add_argument("subcommand", type=str, nargs="?", help="[subword|syllable|word|sent]") args = parser.parse_args(argv[2:3]) cli.exit_if_empty(args.subcommand, parser) subcommand = str.lower(args.subcommand) argv = argv[3:] if subcommand.startswith("word"): WordTokenizationApp("word", argv) elif subcommand.startswith("syl"): SyllableTokenizationApp("syllable", argv) elif subcommand.startswith("subw"): SubwordTokenizationApp("subword", argv) elif subcommand.startswith("sent"): SubwordTokenizationApp("sent", argv) else: raise NotImplementedError( f"Subcommand not available: {subcommand}")
def __init__(self, argv): parser = argparse.ArgumentParser(**cli.make_usage("corpus")) parser.add_argument( "subcommand", type=str, default="", nargs="?", help="[download|remove]" # there should be a "list" subcommand ) parser.add_argument( "--name", type=str, help="corpus's name", ) args = parser.parse_args(argv[2:]) cli.exit_if_empty(args.subcommand, parser) subcommand = str.lower(args.subcommand) if hasattr(App, subcommand): getattr(App, subcommand)(args) else: raise NotImplementedError( f"Subcommand not available: {subcommand}")
def __init__(self, name, argv): parser = argparse.ArgumentParser(**cli.make_usage("tokenize " + name)) parser.add_argument( "-t", "--text", type=str, help="input text", ) parser.add_argument("-s", "--sep", type=bool, help=f"default: {self.separator}", default=self.separator) parser.add_argument("-e", "--engine", type=str, help=f"default: {self.engine}", default=self.engine) parser.add_argument("-w", "--keep-whitespace", type=bool, help=f"default: {self.keep_whitespace}", default=self.keep_whitespace) args = parser.parse_args(argv) self.args = args cli.exit_if_empty(args.text, parser) result = self.run(args.text, engine=args.engine) print(self.separator.join(result))
def __init__(self, name, argv): parser = argparse.ArgumentParser( **cli.make_usage("tokenization " + name) ) parser.add_argument( "--text", type=str, help="input text", ) parser.add_argument( "--engine", type=str, help="default: %s" % self.default_engine, default=self.default_engine ) args = parser.parse_args(argv) self.args = args cli.exit_if_empty(args.text, parser) print(f"Using engine={args.engine}") result = self.run(args.text, engine=args.engine) print(self.separator.join(result))
def __init__(self, name, argv): parser = argparse.ArgumentParser(**cli.make_usage("tokenize " + name)) parser.add_argument( "text", type=str, nargs="?", help="input text", ) parser.add_argument( "-s", "--sep", dest="separator", type=str, help=f"default: {self.separator}", default=self.separator, ) parser.add_argument( "-a", "--algo", dest="algorithm", type=str, help=f"default: {self.algorithm}", default=self.algorithm, ) parser.add_argument( "-w", "--keep-whitespace", dest="keep_whitespace", action="store_true", ) parser.add_argument( "-nw", "--no-whitespace", dest="keep_whitespace", action="store_false", ) parser.set_defaults(keep_whitespace=True) args = parser.parse_args(argv) self.args = args cli.exit_if_empty(args.text, parser) result = self.run( args.text, engine=args.algorithm, keep_whitespace=args.keep_whitespace, ) print(args.separator.join(result) + args.separator)
def __init__(self, argv): parser = argparse.ArgumentParser(**cli.make_usage("tag")) parser.add_argument("subcommand", type=str, nargs="?", help="[pos]") args = parser.parse_args(argv[2:3]) cli.exit_if_empty(args.subcommand, parser) subcommand = str.lower(args.subcommand) argv = argv[3:] if subcommand == "pos": POSTaggingApp("Part-of-Speech tagging", argv) else: raise NotImplementedError( f"Subcommand not available: {subcommand}")
def __init__(self, argv): parser = argparse.ArgumentParser( prog="tokenize", description="Break a text into small units (tokens).", usage=( 'thainlp tokenize <token_type> [options] "<text>"\n\n' "token_type:\n\n" "subword subword (may not be a linguistic unit)\n" "syllable syllable\n" "word word\n" "sent sentence\n\n" "options:\n\n" "--sep or -s <separator> specify custom separator\n" " (default is a space)\n" "--algo or -a <algorithm> tokenization algorithm\n" " (see API doc for more info)\n" "--keep-whitespace or -w keep whitespaces in output\n" " (default)\n\n" "<separator> and <text> should be inside double quotes.\n\n" "Example:\n\n" 'thainlp tokenize word -s "|" "ใต้แสงนีออนเปลี่ยวเหงา"\n\n' "--"), ) parser.add_argument( "token_type", type=str, help="[subword|syllable|word|sent]", ) args = parser.parse_args(argv[2:3]) cli.exit_if_empty(args.token_type, parser) token_type = str.lower(args.token_type) argv = argv[3:] if token_type.startswith("w"): WordTokenizationApp("word", argv) elif token_type.startswith("sy"): SyllableTokenizationApp("syllable", argv) elif token_type.startswith("su"): SubwordTokenizationApp("subword", argv) elif token_type.startswith("se"): SentenceTokenizationApp("sent", argv) else: print(f"Token type not available: {token_type}")
def __init__(self, argv): parser = argparse.ArgumentParser("soundex") parser.add_argument( "--text", type=str, help="text", ) parser.add_argument("--engine", type=str, help="[udom83|lk82|metasound] (default: udom83)", default="udom83") args = parser.parse_args(argv[2:]) cli.exit_if_empty(args.text, parser) sx = soundex(args.text, engine=args.engine) print(sx)
def __init__(self, argv): parser = argparse.ArgumentParser( prog="benchmark", description=("Benchmark for various tasks;\n" "currently, we have only for word tokenization."), usage=("thainlp benchmark [task] [task-options]\n\n" "tasks:\n\n" "word-tokenization benchmark word tokenization\n\n" "--"), ) parser.add_argument("task", type=str, help="[word-tokenization]") args = parser.parse_args(argv[2:3]) cli.exit_if_empty(args.task, parser) task = str.lower(args.task) task_argv = argv[3:] if task == "word-tokenization": WordTokenizationBenchmark(task, task_argv)
def __init__(self, argv): parser = argparse.ArgumentParser(**cli.make_usage("tagging")) parser.add_argument( "command", type=str, nargs="?", help="[pos]" ) args = parser.parse_args(argv[2:3]) command = args.command cli.exit_if_empty(args.command, parser) argv = argv[3:] if command == "pos": POSTaggingApp("Part-of-Speech tagging", argv) else: raise ValueError(f"no command:{subcommand}")
def __init__(self, argv): parser = argparse.ArgumentParser(**cli.make_usage("tokenization")) parser.add_argument( "command", type=str, nargs="?", help="[word|syllable]" ) args = parser.parse_args(argv[2:3]) command = args.command cli.exit_if_empty(command, parser) argv = argv[3:] if command == "word": WordTokenizationApp("word", argv) elif command == "syllable": SyllableTokenizationApp("syllable", argv)
def __init__(self, argv): parser = argparse.ArgumentParser("soundex") parser.add_argument( "subcommand", type=str, nargs="?", help="[udom83|lk82|metasound]" ) parser.add_argument( "-t", "--text", type=str, help="input text", ) args = parser.parse_args(argv[2:3]) cli.exit_if_empty(args.subcommand, parser) subcommand = str.lower(args.subcommand) cli.exit_if_empty(args.text, parser) sdx = "" if subcommand.startswith("udom"): sdx = soundex(args.text, engine="udom83") elif subcommand.startswith("lk"): sdx = soundex(args.text, engine="lk82") elif subcommand.startswith("meta"): sdx = soundex(args.text, engine="metasound") else: raise NotImplementedError( f"Subcommand not available: {subcommand}") print(sdx)
def __init__(self, argv): parser = argparse.ArgumentParser(**cli.make_usage("corpus")) parser.add_argument( "--name", type=str, help="corpus's name", ) parser.add_argument("command", type=str, default="", nargs="?", help="[download|remove]") args = parser.parse_args(argv[2:]) cli.exit_if_empty(args.command, parser) command = args.command if hasattr(App, command): getattr(App, command)(args) else: print("No command available: %s" % command)