def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a backoff builder model using the given "\ "input data. Specify a model type (ngram, etc) and a name to "\ "identify it. The data file should be a stored SequenceIndex file." parser = OptionParser(usage=usage, description=description) parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.") parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.") # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") options, arguments = parse_args_with_config(parser) if len(arguments) < 3: print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_type = arguments[0] model_name = arguments[1] builder_cls = get_backoff_builder(model_type) model_cls = builder_cls.MODEL_CLASS # Load the sequence data from a dbinput file input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=['bulk-db', 'bulk-db-annotated']) # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) if options.partitions is not None: parts = holdout_partition(input_data, options.partitions) models = [(builder_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name,input_data)] for part_name,seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) # Train it with the loaded data model.train(seqs) model.save() print "Trained model %s" % (part_name)
def main(): usage = "%prog <model-type> <command> [<command-arg> ...]" description = "Provides administrative operations for trained "\ "backoff models. Use the 'help' command to get a list "\ "of commands or command usage." parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) < 2: print >>sys.stderr, "You must specify a model type and a command" print >>sys.stderr, "Available model types: %s" % ", ".join(BUILDERS.keys()) print >>sys.stderr, "Use the command 'help' for details of available commands" sys.exit(1) model_type = arguments[0] command = arguments[1].lower() command_args = arguments[2:] builder_cls = get_backoff_builder(model_type) model_cls = builder_cls.MODEL_CLASS def _load_model(name): # Load the model from its file return model_cls.load_model(name) # Define behaviour for each command list_help = "Lists all the trained models available" def _list(args): # List the available models for the given model type models = model_cls.list_models() print "Available models for %s:" % model_cls.MODEL_TYPE print ", ".join(list(sorted(group_numerical_suffixes(models)))) desc_help = "Outputs the descriptive text associated with the model at training time" def _desc(args): if len(args) == 0: raise CommandError, "desc requires a model name as an argument" try: model = _load_model(args[0]) except ModelLoadError, err: # Try loading a model with 0 on the end - allows you to use # just the base name for crossval models try: print >>sys.stderr, "No model %s, but %s0 does exist\n" % (args[0], args[0]) model = _load_model("%s0" % args[0]) except ModelLoadError: raise err print "Model descriptor" print "================" print model.description
def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a backoff builder model using the given "\ "input data. Specify a model type (ngram, etc) and a name to "\ "identify it. The data file should be a stored SequenceIndex file." parser = OptionParser(usage=usage, description=description) parser.add_option( '-p', '--partitions', dest="partitions", action="store", type="int", help= "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number." ) parser.add_option( '--opts', dest="training_opts", action="store", help= "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type." ) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) options, arguments = parse_args_with_config(parser) if len(arguments) < 3: print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_type = arguments[0] model_name = arguments[1] builder_cls = get_backoff_builder(model_type) model_cls = builder_cls.MODEL_CLASS # Load the sequence data from a dbinput file input_data = command_line_input( filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=['bulk-db', 'bulk-db-annotated']) # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) if options.partitions is not None: parts = holdout_partition(input_data, options.partitions) models = [(builder_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name, input_data)] for part_name, seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) # Train it with the loaded data model.train(seqs) model.save() print "Trained model %s" % (part_name)
# Check that the options are valid try: tagger_cls.check_options(topts) except ModuleOptionError, err: logger.error("Problem with tagger options (--topt): %s" % err) return 1 ######## Backoff ######## # Load the requested backoff model, if any if options.backoff is not None: from jazzparser.backoff import BUILDERS if options.backoff.lower() == "help": print "Available backoff model types are: %s" % ", ".join(BUILDERS) return 0 try: backoff = get_backoff_builder(options.backoff) except BackoffLoadError: logger.error("The backoff model '%s' could not be loaded. Possible "\ "models are: %s" % (options.backoff, ", ".join(BUILDERS))) return 1 else: backoff = None # Get backoff options for initializing the backoff model if options.backoff_opts is not None: npoptstr = options.backoff_opts if "help" in [s.strip().lower() for s in npoptstr]: # Output this tagger's option help from jazzparser.utils.options import options_help_text print options_help_text(backoff.BUILDER_OPTIONS, intro="Available options for selected backoff module") return 0