def main(): usage = "%prog [options] <chord-corpus-file> <chord-labeling-model> <midi-file>" description = "Like findsong, but searches by chord label sequence "\ "similarity. The input is not a results file, but a midi file, or "\ "a midi bulk input (CSV)." parser = OptionParser(usage=usage) parser.add_option("--popt", "--parser-options", dest="popts", action="append", help="specify options for the parser that interprets the gold standard annotations. Type '--popt help' to get a list of options (we use a DirectedCkyParser)") parser.add_option("-r", "--print-results", dest="print_results", action="store", default=5, type="int", help="number of top search results to print for each query (parse result). Default: 5. Use -1 to print distances from all songs in the corpus") parser.add_option("--filetype", "--ft", dest="filetype", action="store", default="bulk-segmidi", help="filetype to read in. Use 'segmidi' to read a single midi file, or 'bulk-segmidi' (default) to read many from a CSV") parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") parser.add_option("--labeler-options", "--lopt", dest="labeler_options", action="store", help="options for the labeler. Type '--lopt help' for a list of available options.") parser.add_option("-g", "--gold-only", dest="gold_only", action="store_true", help="skip results that have no gold standard sequence associated with them (we can't tell which is the right answer for these)") parser.add_option("--align", "--print-alignment", dest="print_alignment", action="store_true", help="print out the full alignment between the labeling and the top match") options, arguments = parser.parse_args() # Process parser options if options.popts is not None: poptstr = options.popts if "help" in [s.strip().lower() for s in poptstr]: # Output this parser's option help print options_help_text(DirectedCkyParser.PARSER_OPTIONS, intro="Available options for gold standard interpreter") sys.exit(0) poptstr = ":".join(poptstr) else: poptstr = "" popts = ModuleOption.process_option_string(poptstr) # Check that the options are valid try: DirectedCkyParser.check_options(popts) except ModuleOptionError, err: logger.error("Problem with parser options (--popt): %s" % err) sys.exit(1)
def main(): usage = "%prog [options] <song-set> <results-file0> [<results-file1> ...]" parser = OptionParser(usage=usage) parser.add_option("--popt", "--parser-options", dest="popts", action="append", help="specify options for the parser that interprets the gold standard annotations. Type '--popt help' to get a list of options (we use a DirectedCkyParser)") parser.add_option("-m", "--metric", dest="metric", action="store", help="semantics distance metric to use. Use '-m help' for a list of available metrics") parser.add_option("--mopt", "--metric-options", dest="mopts", action="append", help="options to pass to the semantics metric. Use with '--mopt help' with -m to see available options") parser.add_option("-r", "--print-results", dest="print_results", action="store", default=5, type="int", help="number of top search results to print for each query (parse result). Default: 5. Use -1 to print distances from all songs in the corpus") parser.add_option("-g", "--gold-only", dest="gold_only", action="store_true", help="skip results that have no gold standard sequence associated with them (we can't tell which is the right answer for these)") parser.add_option("--mc", "--metric-computation", dest="metric_computation", action="store_true", help="output the computation information for the metric between the parse result and each top search result") options, arguments = parser.parse_args() # For now, we always use the music_halfspan formalism with this script # If we wanted to make it generic, we'd just load the formalism according # to a command-line option formalism = Formalism # Process parser options if options.popts is not None: poptstr = options.popts if "help" in [s.strip().lower() for s in poptstr]: # Output this parser's option help print options_help_text(DirectedCkyParser.PARSER_OPTIONS, intro="Available options for gold standard interpreter") sys.exit(0) poptstr = ":".join(poptstr) else: poptstr = "" popts = ModuleOption.process_option_string(poptstr) # Check that the options are valid try: DirectedCkyParser.check_options(popts) except ModuleOptionError, err: logger.error("Problem with parser options (--popt): %s" % err) sys.exit(1)
def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a backoff builder model using the given "\ "input data. Specify a model type (ngram, etc) and a name to "\ "identify it. The data file should be a stored SequenceIndex file." parser = OptionParser(usage=usage, description=description) parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.") parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.") # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") options, arguments = parse_args_with_config(parser) if len(arguments) < 3: print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_type = arguments[0] model_name = arguments[1] builder_cls = get_backoff_builder(model_type) model_cls = builder_cls.MODEL_CLASS # Load the sequence data from a dbinput file input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=['bulk-db', 'bulk-db-annotated']) # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) if options.partitions is not None: parts = holdout_partition(input_data, options.partitions) models = [(builder_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name,input_data)] for part_name,seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) # Train it with the loaded data model.train(seqs) model.save() print "Trained model %s" % (part_name)
def cl_output_options(cls, string): """ Convenience method so you don't have to do this lots of times over. Take a string of output options from the command line and set the output options from it. Should only be used in command-line scripts. """ if string is not None and string.lower() == "help": print "Available output options" print "========================" print options_help_text(cls.output_options) sys.exit(0) optdict = ModuleOption.process_option_string(string) cls.process_output_options(optdict)
def main(): usage = "%prog [options] <model_name> <input-file>" description = "Trains a model for the RaphSto chord labelling "\ "algorithm on a file that contains a list of midi files with "\ "training options" parser = OptionParser(usage=usage, description=description) parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.") parser.add_option('--opts', dest="opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.") parser.add_option('--proc', '--processes', dest="processes", action="store", type="int", help="number of parallel processes to spawn for the training. Use -1 to spawn one per training sequence (after splitting: see split_length)", default=1) parser.add_option('--max-length', dest="max_length", action="store", type="int", help="limits the length of the training midi sequences in chunks") parser.add_option('--split-length', dest="split_length", action="store", type="int", help="limits the length of the training midi sequences in chunks, but instead of throwing away everything after the first N chunks, splits it off as if it were starting a new sequence. This is good for multiprocessing, since many short sequences can be multitasked, whilst few long ones cannot") parser.add_option('--min-length', dest="min_length", action="store", type="int", help="ignores any sequences under this number of chunks. This is useful with --split-length, which can leave very short sequences from the end of a split sequence") parser.add_option('--progress-out', dest="progress_out", action="store", help="output logging info to a file instead of the command line") parser.add_option('--init-model', dest="init_model", action="store", help="initialize the model using parameters from an already trained model") parser.add_option('--init-ctrans', dest="init_ctrans", action="store", help="initialize the chord transition distribution using these parameters. Comma-separated list of params given as C0->C1-P, where C0 and C1 are chords (I, II, etc) and P is a float probability") parser.add_option('--chord-set', dest="chord_set", action="store", help="use a chord set other than the default. Use value 'help' to see a list. Has no effect in combination with --init-model, since the old model's chord set will be used") parser.add_option('-m', '--model-type', dest="model_type", action="store", help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()), default="standard") options, arguments = parse_args_with_config(parser) if options.opts is not None and options.opts == "help": print options_help_text(RaphstoBaumWelchTrainer.OPTIONS, intro="Training options for Raphael and Stoddard HMMs") sys.exit(0) opts = ModuleOption.process_option_string(options.opts) if len(arguments) < 2: print >>sys.stderr, "You must specify a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] print >>sys.stderr, "Raphsto training beginning at %s" % datetime.now().isoformat(' ') # Create a logger to output the progress of the training to stdout or a file if options.progress_out is not None: stdout = False logfile = options.progress_out print >>sys.stderr, "Outputing logging info to %s" % logfile else: stdout = True logfile = None print >>sys.stderr, "Outputing logging to stdout" logger = create_logger(name="raphsto_train", filename=logfile, stdout=stdout) logger.info("Raphael and Stoddard HMM model training") if options.model_type not in MODEL_TYPES: print >>sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES) sys.exit(1) model_cls = MODEL_TYPES[options.model_type] if options.chord_set == "help": print "Available chord sets: %s" % ", ".join(constants.CHORD_SETS.keys()) sys.exit(0) elif options.chord_set is not None: # Check this chord set exists if options.chord_set not in constants.CHORD_SETS: print >>sys.stderr, "Chord set '%s' does not exist" % options.chord_set sys.exit(1) else: logger.info("Using chord set '%s'" % options.chord_set) # Read in the training data midis = InputSourceFile(filename) handlers = midis.get_handlers() logger.info("Reading in %d midi files..." % len(midis.inputs)) training_data = [] for i,mh in enumerate(handlers): logger.info("%s: %s" % (i,midis.inputs[i][0])) emissions = mh.get_emission_stream()[0] if options.max_length is not None and len(emissions) > options.max_length: logger.info("Truncating file %d to %d chunks (was %d)" % \ (i,options.max_length,len(emissions))) emissions = emissions[:options.max_length] if options.split_length is not None: logger.info("Splitting sequence %d into sequence no longer "\ "than %d chunks" % (i,options.split_length)) # Split up the sequence if it's too long while len(emissions) > options.split_length: training_data.append(emissions[:options.split_length]) emissions = emissions[options.split_length:] training_data.append(emissions) if options.min_length is not None: # Make sure there are no sequences under the minimum length # Just throw away any that are before_chuck = len(training_data) training_data = [seq for seq in training_data if len(seq) >= options.min_length] if len(training_data) != before_chuck: logger.info("Threw away %d short sequences (below %d chunks)" % \ ((before_chuck-len(training_data)), options.min_length)) logger.info("Training on %d sequences. Lengths: %s" % \ (len(training_data), ", ".join(str(len(seq)) for seq in training_data))) if options.partitions is not None: parts = holdout_partition(training_data, options.partitions) models = [("%s%d" % (model_name,num),data) for num,data in enumerate(parts)] else: models = [(model_name,training_data)] # Number of processes to use if options.processes == -1: # Special value: means number of training sequences (one process per sequence) processes = len(training_data) else: processes = options.processes for part_name,data in models: # Instantiate a fresh model with this name logger.info("Training model '%s' on %d midis" % (part_name, len(data))) if options.init_model is not None: logger.info("Initializing using parameters from model '%s'" % \ options.init_model) # Load an already trained model as initialization model = model_cls.initialize_existing_model(options.init_model, \ model_name=part_name) else: # TODO: make these probs an option ctype_params = (0.5, 0.3, 0.2) logger.info("Initializing to naive chord types using parameters: "\ "%s, %s, %s" % ctype_params) init_kwargs = { 'model_name' : part_name } if options.chord_set is not None: # Specify a chord set for the model init_kwargs['chord_set'] = options.chord_set model = model_cls.initialize_chord_types(ctype_params, **init_kwargs) # Initialize the chord transition probabilities if given if options.init_ctrans is not None: logger.info("Initializing chord transition distribution to %s" \ % options.init_ctrans) model.set_chord_transition_probabilities(options.init_ctrans) # Retrain it with the loaded data trainer = model_cls.get_trainer()(model, options=opts) trainer.train(data, logger=logger, processes=processes, save_intermediate=True) print >>sys.stderr, "Training terminating at %s" % datetime.now().isoformat(' ')
print "Available taggers are: %s" % ", ".join(TAGGERS) return 0 try: tagger_cls = get_tagger(options.supertagger) except TaggerLoadError: logger.error("The tagger '%s' could not be loaded. Possible "\ "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS))) return 1 # Get supertagger options before initializing the tagger if options.topts is not None: toptstr = options.topts if "help" in [s.strip().lower() for s in toptstr]: # Output this tagger's option help from jazzparser.utils.options import options_help_text print options_help_text(tagger_cls.TAGGER_OPTIONS, intro="Available options for selected tagger") return 0 toptstr = ":".join(toptstr) else: toptstr = "" topts = ModuleOption.process_option_string(toptstr) # Check that the options are valid try: tagger_cls.check_options(topts) except ModuleOptionError, err: logger.error("Problem with tagger options (--topt): %s" % err) return 1 ######## Backoff ######## # Load the requested backoff model, if any if options.backoff is not None:
def main(): usage = "%prog [options] <model_name> <in-file>" description = "Loads a chord labeling model and uses it to assign chord "\ "labels to the given MIDI file." parser = OptionParser(usage=usage, description=description) # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='segmidi') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") # Labeling options parser.add_option("--labeler-options", "--lopt", dest="labeler_options", action="append", help="options for the labeler. Type '--lopt help' for a list of available options.") parser.add_option("--no-key", "--nk", dest="no_key", action="store_true", help="merge together labels with the same key (same as --lopt nokey)") # Output options parser.add_option("--single", "-1", dest="single", action="store_true", help="show only one chord per time segment (same as --lopt n=1, but formats the output in a simpler way)") parser.add_option('-r', '--realize', dest="realize", action="store", help="realize the chord sequence as a midi file, overlaid on the input") parser.add_option('--chords-only', dest="chords_only", action="store_true", help="only realize the chords: don't overlay on the input midi (only works with -r)") options, arguments = parse_args_with_config(parser) if options.labeler_options is not None and "help" in options.labeler_options: print options_help_text(HPChordLabeler.LABELING_OPTIONS, intro="Options for HP chord labeler") sys.exit(0) if len(arguments) < 2: print >>sys.stderr, "You must specify a model name and an input "\ "(MIDI) data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] # Process the labeler options lopt_dict = ModuleOption.process_option_string(options.labeler_options) if options.single: # No point in getting more than one label, since we only display one lopt_dict['n'] = 1 if options.no_key: # Just set the nokey option lopt_dict['nokey'] = True # Check they're valid before doing anything else HPChordLabeler.process_labeling_options(lopt_dict) input_data = command_line_input(filename, filetype=options.filetype, options=options.file_options, allowed_types=['segmidi','bulk-segmidi']) bulk = not is_bulk_type(type(input_data)) if bulk: input_data = [input_data] for i,data in enumerate(input_data): input_stream = data.stream print "Read midi data in %d segments" % len(data) # Load the model model = HPChordLabeler.load_model(model_name) # Perform labeling labels = model.label(data, options=lopt_dict) # Try labeling as it will be passed to the tagger labs = model.label_lattice(data, options=lopt_dict) if options.single: # Special output for single label output print ", ".join(["%s" % timelabs[0][0] for timelabs in labels]) else: # Print out the labels for each timestep for time,timelabs in enumerate(labels): print "%d: %s" % (time, ", ".join(["%s (%.2e)" % (label,prob) for (label,prob) in timelabs])) if options.realize is not None: # Get the single best chord label for each time best_labels = [timelabs[0][0] for timelabs in labels] # Realize as a midi file print "Realizing output chord sequence" real = ChordSequenceRealizer(best_labels, model.chord_vocab, resolution=input_stream.resolution, chord_length=data.time_unit, text_events=True) if options.chords_only: # Don't overlay stream = real.generate(offset=data.tick_offset) else: stream = real.generate(overlay=input_stream, offset=data.tick_offset) if bulk: filename = "%s-%d" % (options.realize, i) else: filename = options.realize write_midifile(stream, filename)
def main(): usage = "%prog [<options>] <model-name> <training-input>" description = "Training of PCFG models." parser = OptionParser(usage=usage, description=description) parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \ help="Number of partitions to divide the data into. "\ "For train, divides the input file, trains a model on each "\ "partition's complement and appends partition number to "\ "the model names. For del, appends partition numbers to model "\ "names and deletes all the models. Recache does similarly. "\ "Has no effect for parse.") parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options") parser.add_option("--debug", dest="debug", action="store_true", help="Output verbose logging information to stderr") parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") options, arguments = parse_args_with_config(parser) if options.debug: log_level = logging.DEBUG else: log_level = logging.WARN # Create a logger for training logger = create_logger(log_level = log_level, name = "training", stderr = True) # Load a grammar grammar = get_grammar(options.grammar) # Get the pcfg model class for the formalism PcfgModel = grammar.formalism.PcfgModel # Parse the option string if options.training_opts is None: opts = {} elif options.training_opts.lower() == "help": print options_help_text(PcfgModel.TRAINING_OPTIONS, intro="Training options for PCFGs") sys.exit(0) else: opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), PcfgModel.TRAINING_OPTIONS) if len(arguments) == 0: print >>sys.stderr, "Specify a model name" models = PcfgModel.list_models() print >>sys.stderr, "Available models: %s" % ", ".join(models) sys.exit(1) model_name = arguments[0] print "Model base name:", model_name if options.partitions is not None: parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)] else: parts = [(None, model_name)] if len(arguments) < 2: print >>sys.stderr, "Specify an input file to read sequence data from" sys.exit(1) # Read in the training data from the given file seqs = SequenceIndex.from_file(arguments[1]) if options.partitions is not None: # Prepare each training partition datasets = holdout_partition(seqs.sequences, options.partitions) else: datasets = [seqs.sequences] for dataset,(parti,part_model) in zip(datasets,parts): # Train the named model on the sequence data model = PcfgModel.train(part_model, dataset, opts, grammar=grammar, logger=logger) model.save() print "Trained model", part_model
def main(): usage = "%prog [options] <model_name> <in-file>" description = "Trains a chord labeling model using the given "\ "input data. The data file may be a stored SequenceIndex file, or "\ "any other type of bulk data file." parser = OptionParser(usage=usage, description=description) parser.add_option( '-p', '--partitions', dest="partitions", action="store", type="int", help= "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number." ) parser.add_option( '--opts', dest="training_opts", action="append", help= "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type." ) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) # Logging output parser.add_option( '--log', dest="log", action="store", help= "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end" ) options, arguments = parse_args_with_config(parser) grammar = Grammar() # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif "help" in [opt.lower() for opt in options.training_opts]: print options_help_text(HPChordLabeler.TRAINING_OPTIONS, intro="Training options:") sys.exit(0) else: training_opts = ModuleOption.process_option_string( options.training_opts) if len(arguments) < 2: print >> sys.stderr, "You must specify a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] # Load the sequence data # Only allow bulk types input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=get_input_type_names( single=False, bulk=True)) # Only partition the chord data, not the MIDI data if options.partitions is not None and not \ (isinstance(input_data, MidiTaggerTrainingBulkInput) and \ input_data.chords is not None): print >>sys.stderr, "Can only partition chord data and no chord data "\ "was supplied" sys.exit(1) if options.partitions: # The input includes chord training data parts = input_data.chords.get_partitions(options.partitions)[1] models = [("%s%d" % (model_name,num),chord_data) \ for num,chord_data in enumerate(parts)] else: models = [(model_name, None)] for part_name, chord_data in models: if options.log is not None: # Prepare a logger logfile = "%s%s.log" % (options.log, part_name) print "Logging output to file %s" % logfile logger = create_logger(filename=logfile) else: logger = None # Create a fresh model with this name model = HPChordLabeler.train(input_data, part_name, logger=logger, options=training_opts, chord_data=chord_data) print "Trained model %s" % (part_name)
def command_line_metric(formalism, metric_name=None, options=""): """ Utility function to make it easy to load a metric, with user-specified options, from the command line. Takes care of printing help output. Typical options:: parser.add_option("-m", "--metric", dest="metric", action="store", help="semantics distance metric to use. Use '-m help' for a list of available metrics") parser.add_option("--mopt", "--metric-options", dest="mopts", action="append", help="options to pass to the semantics metric. Use with '--mopt help' with -m to see available options") You could then call this as:: metric = command_line_metric(formalism, options.metric, options.mopts) @return: the metric instantiated with given options """ import sys from jazzparser.utils.options import ModuleOption, options_help_text # Get a distance metric # Just check this, as it'll cause problems if len(formalism.semantics_distance_metrics) == 0: print "ERROR: the formalism defines no distance metrics, so this "\ "script won't work" sys.exit(1) # First get the metric if metric_name == "help": # Print out a list of metrics available print "Available distance metrics:" print ", ".join([metric.name for metric in \ formalism.semantics_distance_metrics]) sys.exit(0) if metric_name is None: # Use the first in the list as default metric_cls = formalism.semantics_distance_metrics[0] else: # Look for the named metric for m in formalism.semantics_distance_metrics: if m.name == metric_name: metric_cls = m break else: # No metric found matching this name print "No metric '%s'" % metric_name sys.exit(1) # Options might be given as a list, if the option action was "append" if isinstance(options, str): options = [options] # Now process the metric options if options is not None: moptstr = options if "help" in [s.strip().lower() for s in options]: # Output this parser's option help print options_help_text(metric_cls.OPTIONS, intro="Available options for metric '%s'" % metric_cls.name) sys.exit(0) moptstr = ":".join(moptstr) else: moptstr = "" mopts = ModuleOption.process_option_string(moptstr) # Instantiate the metric with these options metric = metric_cls(options=mopts) return metric
def command_line_input(filename=None, filetype=None, options="", allowed_types=None, default_type=None): """ Utility function for processing file input options from the command line. Pass in as args the values straight from the command line options to select a filename, filetype and list of options. Typical command-line options for this purpose (for an optparse option parser C{op}):: op.add_option("--file", "-f", dest="file", action="store", help="use a file to get input from") op.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Use '--filetype help' for a list of available types") op.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Use '--fopt help', with '--ft <type>', for a list of available options") Then you can call this function as:: command_line_input(filename=options.file, filetype=options.filetype, options=options.file_options) @type allowed_types: list of strs @param allowed_types: types of input you want the user to be able to give. If not given, all types are allowed @type default_type: str @param default_type: filetype to assume if no other filetype is given @rtype: L{InputReader} subclass @return: the input wrapper of appropriate type, or None if no input file was given """ if allowed_types is None: allowed_types = get_input_type_names() if filetype is None and default_type is not None: filetype = default_type # Catch a request for filetype help if filetype is not None and filetype.lower() == "help": # Output possible file types print "Allowed input types: %s" % ", ".join(allowed_types) sys.exit(0) # Check that the filetype is valid and get the input type class if it is input_type = get_input_type(filetype) type_name = input_type_name(input_type) if input_type is None: raise InputTypeError, "Unknown filetype '%s'. Allowed types are: %s" % (filetype, ", ".join(allowed_types)) if type_name not in allowed_types: raise InputTypeError, "Cannot accept input of type '%s'. Allowed " "types are: %s" % ( filetype, ", ".join(allowed_types), ) if options is not None and options.lower() == "help": # Output help text from jazzparser.utils.options import options_help_text print options_help_text(input_type.FILE_INPUT_OPTIONS, intro="Available options for input type %s" % type_name) sys.exit(0) if filename is None: return None # First get a dict of the options file_options = ModuleOption.process_option_string(options) # Process the options as appropriate for this type file_options = input_type.process_option_dict(file_options) # Instantiate the input from the file as appropriate for the input type input_data = input_type.from_file(filename, file_options) return input_data
def main(): set_proc_title("jazzparser") ######################################################## usage = "jazzparser [<options>]" description = "The main parser interface for the Jazz Parser" ## Process the input options optparser = OptionParser(usage=usage, description=description) ### # File input options group = OptionGroup(optparser, "Input", "Input type and location") optparser.add_option_group(group) group.add_option("--file", "-f", dest="file", action="store", help="use a file to get parser input from. Use --filetype to specify the type of the file.") group.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords", default='chords') group.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") group.add_option("--index", "--indices", dest="input_index", action="store", help="select individual inputs to process. Specify as a comma-separated list of indices. All inputs are loaded as usual, but only the ith input is processed, for each i in the list") group.add_option("--only-load", dest="only_load", action="store_true", help="don't do anything with the inputs, just load and list them. Handy for checking the inputs load and getting their indices") group.add_option("--partitions", dest="partitions", action="store", type="int", help="divide the input data into this number of partitions and use a different set of models for each. For any parser, tagger and backoff that takes a 'model' argument, the partition number will be appended to the given value") group.add_option("--seq-parts", "--sequence-partitions", dest="sequence_partitions", action="store", help="use a chord sequence index to partition the inputs. Input type (bulk) must support association of the inputs with chord sequences by id. Sequences in the given sequence index file are partitioned n ways (--partitions) and the inputs are processed according to their associated sequence.") group.add_option("--continue", "--skip-done", dest="skip_done", action="store_true", help="skip any inputs for which a readable results file already exists. This is useful for continuing a bulk job that was stopped in the middle") ### group = OptionGroup(optparser, "Parser", "Parser, supertagger and backoff parser") optparser.add_option_group(group) group.add_option("-d", "--derivations", dest="derivations", action="store_true", help="keep derivation logs during parse.") group.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") # Parser options group.add_option("-p", "--parser", dest="parser", action="store", help="use the named parser algorithm instead of the default. Use '-p help' to see the list of available parsers. Default: %s" % settings.DEFAULT_PARSER, default=settings.DEFAULT_PARSER) group.add_option("--popt", "--parser-options", dest="popts", action="append", help="specify options for the parser. Type '--popt help', using '--parser <name>' to select a parser module, to get a list of options.") # Tagger options group.add_option("-t", "--tagger", "--supertagger", dest="supertagger", action="store", help="run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s" % settings.DEFAULT_SUPERTAGGER, default=settings.DEFAULT_SUPERTAGGER) group.add_option("--topt", "--tagger-options", dest="topts", action="append", help="specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options.") # Backoff options group.add_option("-b", "--backoff", "--noparse", dest="backoff", action="store", help="use the named backoff model as a backoff if the parser produces no results") group.add_option("--bopt", "--backoff-options", "--backoff-options", "--npo", dest="backoff_opts", action="append", help="specify options for the backoff model. Type '--npo help', using '--backoff <name>' to select a backoff modules, to get a list of options.") ### # Multiprocessing options group = OptionGroup(optparser, "Multiprocessing") optparser.add_option_group(group) group.add_option("--processes", dest="processes", action="store", type="int", help="number of processes to create to perform parses in parallel. Default: 1, i.e. no process pool. Use -1 to create a process for every input", default=1) ### # Output options group = OptionGroup(optparser, "Output") optparser.add_option_group(group) group.add_option("--output", dest="output", action="store", help="directory name to output parse results to. A filename specific to the individual input will be appended to this") group.add_option("--topn", dest="topn", action="store", type="int", help="limit the number of final results to store in the output file to the top n by probability. By default, stores all") group.add_option("--output-opts", "--oopts", dest="output_opts", action="store", help="options that affect the output formatting. Use '--output-opts help' for a list of options.") group.add_option("-a", "--atomic-results", dest="atoms_only", action="store_true", help="only include atomic categories in the results.") group.add_option("-l", "--latex", dest="latex", action="store_true", help="output all results as Latex source. Used to produce a whole Latex document, but doesn't any more") group.add_option("--all-times", dest="all_times", action="store_true", help="display all timing information on semantics in output.") group.add_option("-v", "--debug", dest="debug", action="store_true", help="output verbose debugging information.") group.add_option("--time", dest="time", action="store_true", help="time how long the parse takes and output with the results.") group.add_option("--no-results", dest="no_results", action="store_true", help="don't print out the parse results at the end. Obviously you'll want to make sure they're going to a file (--output). This is useful for bulk parse jobs, where the results produce a lot of unnecessary output") group.add_option("--no-progress", dest="no_progress", action="store_true", help="don't output the summary of completed sequences after each one finishes") ### # Output analysis and harmonical group = OptionGroup(optparser, "Output processing", "Output analysis and harmonical") optparser.add_option_group(group) group.add_option("--harmonical", dest="harmonical", action="store", help="use the harmonical to play the chords justly intoned according to the top result and output to a wave file.") group.add_option("--enharmonical", dest="enharmonical", action="store", help="use the harmonical to play the chords in equal temperament and output to a wave file.") group.add_option("--midi", dest="midi", action="store_true", help="generate MIDI files from the harmonical, instead of wave files.") group.add_option("--tempo", dest="tempo", action="store", type=int, help="tempo to use for the generated music (see --harmonical/--enharmonical). Default: 120", default=120) group.add_option("--lh-analysis", dest="lh_analysis", action="store_true", help="output the Longuet-Higgins space interpretation of the semantics for each result.") group.add_option("--lh-coordinates", dest="lh_coord", action="store_true", help="like lh-analysis, but displays the coordinates of the points instead of their names.") ### # Logging options group = OptionGroup(optparser, "Logging") optparser.add_option_group(group) group.add_option("--long-progress", dest="long_progress", action="store_true", help="print a summary of the chart so far after each chord/word has been processed.") group.add_option("--progress", "--short-progress", dest="short_progress", action="store_true", help="print a small amount of information out during parsing to indicate progress.") group.add_option("--logger", dest="logger", action="store", help="directory to put parser logging in. A filename based on an identifier for each individual input will be appended.") ### # Shell options group = OptionGroup(optparser, "Shell", "Interactive shell for inspecting results and parser state") optparser.add_option_group(group) group.add_option("-i", "--interactive", dest="interactive", action="store_true", help="enter interactive mode after parsing.") group.add_option("--error", dest="error_shell", action="store_true", help="catch any errors, report them and then enter the interactive shell. This also catches keyboard interrupts, so you can use it to halt parsing and enter the shell.") # Read in command line options and args options, clinput = parse_args_with_config(optparser) ########################### Option processing #################### # Get log level option first, so we can start using the logger if options.debug: log_level = logging.DEBUG else: log_level = logging.INFO # Set up a logger init_logging(log_level) if options.latex: settings.OPTIONS.OUTPUT_LATEX = True if options.logger: # Directory parse_logger_dir = options.logger check_directory(parse_logger_dir) else: parse_logger_dir = None ######## Grammar ######## # Check the grammar actually exists grammar_names = get_grammar_names() if options.grammar is not None and options.grammar not in grammar_names: # This is not a valid grammar name logger.error("The grammar '%s' does not exist. Possible "\ "grammars are: %s." % (options.grammar, ", ".join(grammar_names))) return 1 grammar = get_grammar(options.grammar) ######## Parser ######## # Load the requested parser from jazzparser.parsers import PARSERS if options.parser.lower() == "help": print "Available parsers are: %s" % ", ".join(PARSERS) return 0 try: parser_cls = get_parser(options.parser) except ParserLoadError: logger.error("The parser '%s' could not be loaded. Possible "\ "parsers are: %s" % (options.parser, ", ".join(PARSERS))) return 1 # Get parser options if options.popts is not None: poptstr = options.popts if "help" in [s.strip().lower() for s in poptstr]: # Output this tagger's option help from jazzparser.utils.options import options_help_text print options_help_text(parser_cls.PARSER_OPTIONS, intro="Available options for selected parser") return 0 poptstr = ":".join(poptstr) else: poptstr = "" popts = ModuleOption.process_option_string(poptstr) # Check that the options are valid try: parser_cls.check_options(popts) except ModuleOptionError, err: logger.error("Problem with parser options (--popt): %s" % err) return 1
def main(): usage = "%prog [options] <model_name> <in-file>" description = "Trains a chord labeling model using the given "\ "input data. The data file may be a stored SequenceIndex file, or "\ "any other type of bulk data file." parser = OptionParser(usage=usage, description=description) parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.") parser.add_option('--opts', dest="training_opts", action="append", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.") # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") # Logging output parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end") options, arguments = parse_args_with_config(parser) grammar = Grammar() # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif "help" in [opt.lower() for opt in options.training_opts]: print options_help_text(HPChordLabeler.TRAINING_OPTIONS, intro="Training options:") sys.exit(0) else: training_opts = ModuleOption.process_option_string(options.training_opts) if len(arguments) < 2: print >>sys.stderr, "You must specify a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] # Load the sequence data # Only allow bulk types input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=get_input_type_names(single=False, bulk=True)) # Only partition the chord data, not the MIDI data if options.partitions is not None and not \ (isinstance(input_data, MidiTaggerTrainingBulkInput) and \ input_data.chords is not None): print >>sys.stderr, "Can only partition chord data and no chord data "\ "was supplied" sys.exit(1) if options.partitions: # The input includes chord training data parts = input_data.chords.get_partitions(options.partitions)[1] models = [("%s%d" % (model_name,num),chord_data) \ for num,chord_data in enumerate(parts)] else: models = [(model_name,None)] for part_name,chord_data in models: if options.log is not None: # Prepare a logger logfile = "%s%s.log" % (options.log, part_name) print "Logging output to file %s" % logfile logger = create_logger(filename=logfile) else: logger = None # Create a fresh model with this name model = HPChordLabeler.train(input_data, part_name, logger=logger, options=training_opts, chord_data=chord_data) print "Trained model %s" % (part_name)
def main(): usage = "%prog [<options>]" description = "Runs a supertagger from the Jazz Parser to tag some input "\ "but just outputs the results, rather than continuing to parse." optparser = OptionParser(usage=usage, description=description) # Tagger options optparser.add_option( "-t", "--tagger", "--supertagger", dest="supertagger", action="store", help= "run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s" % settings.DEFAULT_SUPERTAGGER, default=settings.DEFAULT_SUPERTAGGER) optparser.add_option( "--topt", "--tagger-options", dest="topts", action="append", help= "specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options." ) # Commonly-used misc optparser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") # File input options optparser.add_option( "--file", "-f", dest="file", action="store", help= "use a file to get parser input from. Use --filetype to specify the type of the file." ) optparser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords", default='chords') optparser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) # Misc options optparser.add_option("-v", "--debug", dest="debug", action="store_true", help="output verbose debugging information.") optparser.add_option( "-i", "--interactive", dest="interactive", action="store_true", help= "instead of just outputing all tags in one go, wait for user input between each iteration of adaptive supertagging" ) # Logging options optparser.add_option( "--logger", dest="logger", action="store", help= "directory to put parser logging in. A filename based on an identifier for each individual input will be appended." ) # Read in command line options and args options, clinput = parse_args_with_config(optparser) ########################### Option processing #################### if options.logger: # Directory parse_logger_dir = options.logger check_directory(parse_logger_dir) else: parse_logger_dir = None ######## Grammar ######## # Read in the grammar grammar = get_grammar(options.grammar) ######## Supertagger ######## # Load the supertagger requested if options.supertagger.lower() == "help": print "Available taggers are: %s" % ", ".join(TAGGERS) return 0 try: tagger_cls = get_tagger(options.supertagger) except TaggerLoadError: logger.error("The tagger '%s' could not be loaded. Possible "\ "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS))) return 1 # Get supertagger options before initializing the tagger if options.topts is not None: toptstr = options.topts if "help" in [s.strip().lower() for s in toptstr]: # Output this tagger's option help from jazzparser.utils.options import options_help_text print options_help_text( tagger_cls.TAGGER_OPTIONS, intro="Available options for selected tagger") return 0 toptstr = ":".join(toptstr) else: toptstr = "" topts = ModuleOption.process_option_string(toptstr) # Check that the options are valid try: tagger_cls.check_options(topts) except ModuleOptionError, err: print "Problem with tagger options (--topt): %s" % err return 1
def main(): usage = "%prog [<options>] <model-name> <training-input>" description = "Training of PCFG models." parser = OptionParser(usage=usage, description=description) parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \ help="Number of partitions to divide the data into. "\ "For train, divides the input file, trains a model on each "\ "partition's complement and appends partition number to "\ "the model names. For del, appends partition numbers to model "\ "names and deletes all the models. Recache does similarly. "\ "Has no effect for parse.") parser.add_option( '--opts', dest="training_opts", action="store", help= "options to pass to the model trainer. Type '--opts help' for a list of options" ) parser.add_option("--debug", dest="debug", action="store_true", help="Output verbose logging information to stderr") parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") options, arguments = parse_args_with_config(parser) if options.debug: log_level = logging.DEBUG else: log_level = logging.WARN # Create a logger for training logger = create_logger(log_level=log_level, name="training", stderr=True) # Load a grammar grammar = get_grammar(options.grammar) # Get the pcfg model class for the formalism PcfgModel = grammar.formalism.PcfgModel # Parse the option string if options.training_opts is None: opts = {} elif options.training_opts.lower() == "help": print options_help_text(PcfgModel.TRAINING_OPTIONS, intro="Training options for PCFGs") sys.exit(0) else: opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), PcfgModel.TRAINING_OPTIONS) if len(arguments) == 0: print >> sys.stderr, "Specify a model name" models = PcfgModel.list_models() print >> sys.stderr, "Available models: %s" % ", ".join(models) sys.exit(1) model_name = arguments[0] print "Model base name:", model_name if options.partitions is not None: parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)] else: parts = [(None, model_name)] if len(arguments) < 2: print >> sys.stderr, "Specify an input file to read sequence data from" sys.exit(1) # Read in the training data from the given file seqs = SequenceIndex.from_file(arguments[1]) if options.partitions is not None: # Prepare each training partition datasets = holdout_partition(seqs.sequences, options.partitions) else: datasets = [seqs.sequences] for dataset, (parti, part_model) in zip(datasets, parts): # Train the named model on the sequence data model = PcfgModel.train(part_model, dataset, opts, grammar=grammar, logger=logger) model.save() print "Trained model", part_model
def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a backoff builder model using the given "\ "input data. Specify a model type (ngram, etc) and a name to "\ "identify it. The data file should be a stored SequenceIndex file." parser = OptionParser(usage=usage, description=description) parser.add_option( '-p', '--partitions', dest="partitions", action="store", type="int", help= "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number." ) parser.add_option( '--opts', dest="training_opts", action="store", help= "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type." ) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) options, arguments = parse_args_with_config(parser) if len(arguments) < 3: print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_type = arguments[0] model_name = arguments[1] builder_cls = get_backoff_builder(model_type) model_cls = builder_cls.MODEL_CLASS # Load the sequence data from a dbinput file input_data = command_line_input( filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=['bulk-db', 'bulk-db-annotated']) # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) if options.partitions is not None: parts = holdout_partition(input_data, options.partitions) models = [(builder_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name, input_data)] for part_name, seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) # Train it with the loaded data model.train(seqs) model.save() print "Trained model %s" % (part_name)
else: for m in formalism.semantics_distance_metrics: if m.name == options.metric: metric_cls = m break else: # No metric found matching this name print "No metric '%s'" % options.metric sys.exit(1) print >>sys.stderr, "Using distance metric: %s" % metric_cls.name # Now process the metric options if options.mopts is not None: moptstr = options.mopts if "help" in [s.strip().lower() for s in moptstr]: # Output this parser's option help print options_help_text(metric_cls.OPTIONS, intro="Available options for metric '%s'" % metric_cls.name) sys.exit(0) moptstr = ":".join(moptstr) else: moptstr = "" mopts = ModuleOption.process_option_string(moptstr) # Instantiate the metric with these options metric = metric_cls(options=mopts) if len(arguments) < 2: print >>sys.stderr, "Specify a song corpus name and one or more files to read results from" sys.exit(1) # First argument is an TonalSpaceAnalysisSet corpus_name = arguments[0]
for m in formalism.semantics_distance_metrics: if m.name == options.metric: metric_cls = m break else: # No metric found matching this name print "No metric '%s'" % options.metric sys.exit(1) print >> sys.stderr, "Using distance metric: %s" % metric_cls.name # Now process the metric options if options.mopts is not None: moptstr = options.mopts if "help" in [s.strip().lower() for s in moptstr]: # Output this parser's option help print options_help_text(metric_cls.OPTIONS, intro="Available options for metric '%s'" % metric_cls.name) sys.exit(0) moptstr = ":".join(moptstr) else: moptstr = "" mopts = ModuleOption.process_option_string(moptstr) # Instantiate the metric with these options metric = metric_cls(options=mopts) if len(arguments) < 2: print >> sys.stderr, "Specify a song corpus name and one or more files to read results from" sys.exit(1) # First argument is an TonalSpaceAnalysisSet corpus_name = arguments[0]
def main(): usage = "%prog [options] <model_name> <in-file>" description = "Loads a chord labeling model and uses it to assign chord "\ "labels to the given MIDI file." parser = OptionParser(usage=usage, description=description) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='segmidi') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) # Labeling options parser.add_option( "--labeler-options", "--lopt", dest="labeler_options", action="append", help= "options for the labeler. Type '--lopt help' for a list of available options." ) parser.add_option( "--no-key", "--nk", dest="no_key", action="store_true", help="merge together labels with the same key (same as --lopt nokey)") # Output options parser.add_option( "--single", "-1", dest="single", action="store_true", help= "show only one chord per time segment (same as --lopt n=1, but formats the output in a simpler way)" ) parser.add_option( '-r', '--realize', dest="realize", action="store", help="realize the chord sequence as a midi file, overlaid on the input" ) parser.add_option( '--chords-only', dest="chords_only", action="store_true", help= "only realize the chords: don't overlay on the input midi (only works with -r)" ) options, arguments = parse_args_with_config(parser) if options.labeler_options is not None and "help" in options.labeler_options: print options_help_text(HPChordLabeler.LABELING_OPTIONS, intro="Options for HP chord labeler") sys.exit(0) if len(arguments) < 2: print >>sys.stderr, "You must specify a model name and an input "\ "(MIDI) data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] # Process the labeler options lopt_dict = ModuleOption.process_option_string(options.labeler_options) if options.single: # No point in getting more than one label, since we only display one lopt_dict['n'] = 1 if options.no_key: # Just set the nokey option lopt_dict['nokey'] = True # Check they're valid before doing anything else HPChordLabeler.process_labeling_options(lopt_dict) input_data = command_line_input(filename, filetype=options.filetype, options=options.file_options, allowed_types=['segmidi', 'bulk-segmidi']) bulk = not is_bulk_type(type(input_data)) if bulk: input_data = [input_data] for i, data in enumerate(input_data): input_stream = data.stream print "Read midi data in %d segments" % len(data) # Load the model model = HPChordLabeler.load_model(model_name) # Perform labeling labels = model.label(data, options=lopt_dict) # Try labeling as it will be passed to the tagger labs = model.label_lattice(data, options=lopt_dict) if options.single: # Special output for single label output print ", ".join(["%s" % timelabs[0][0] for timelabs in labels]) else: # Print out the labels for each timestep for time, timelabs in enumerate(labels): print "%d: %s" % (time, ", ".join([ "%s (%.2e)" % (label, prob) for (label, prob) in timelabs ])) if options.realize is not None: # Get the single best chord label for each time best_labels = [timelabs[0][0] for timelabs in labels] # Realize as a midi file print "Realizing output chord sequence" real = ChordSequenceRealizer(best_labels, model.chord_vocab, resolution=input_stream.resolution, chord_length=data.time_unit, text_events=True) if options.chords_only: # Don't overlay stream = real.generate(offset=data.tick_offset) else: stream = real.generate(overlay=input_stream, offset=data.tick_offset) if bulk: filename = "%s-%d" % (options.realize, i) else: filename = options.realize write_midifile(stream, filename)
def main(): usage = "%prog [options] <song-set> <results-file0> [<results-file1> ...]" parser = OptionParser(usage=usage) parser.add_option( "--popt", "--parser-options", dest="popts", action="append", help= "specify options for the parser that interprets the gold standard annotations. Type '--popt help' to get a list of options (we use a DirectedCkyParser)" ) parser.add_option( "-m", "--metric", dest="metric", action="store", help= "semantics distance metric to use. Use '-m help' for a list of available metrics" ) parser.add_option( "--mopt", "--metric-options", dest="mopts", action="append", help= "options to pass to the semantics metric. Use with '--mopt help' with -m to see available options" ) parser.add_option( "-r", "--print-results", dest="print_results", action="store", default=5, type="int", help= "number of top search results to print for each query (parse result). Default: 5. Use -1 to print distances from all songs in the corpus" ) parser.add_option( "-g", "--gold-only", dest="gold_only", action="store_true", help= "skip results that have no gold standard sequence associated with them (we can't tell which is the right answer for these)" ) parser.add_option( "--mc", "--metric-computation", dest="metric_computation", action="store_true", help= "output the computation information for the metric between the parse result and each top search result" ) options, arguments = parser.parse_args() # For now, we always use the music_halfspan formalism with this script # If we wanted to make it generic, we'd just load the formalism according # to a command-line option formalism = Formalism # Process parser options if options.popts is not None: poptstr = options.popts if "help" in [s.strip().lower() for s in poptstr]: # Output this parser's option help print options_help_text( DirectedCkyParser.PARSER_OPTIONS, intro="Available options for gold standard interpreter") sys.exit(0) poptstr = ":".join(poptstr) else: poptstr = "" popts = ModuleOption.process_option_string(poptstr) # Check that the options are valid try: DirectedCkyParser.check_options(popts) except ModuleOptionError, err: logger.error("Problem with parser options (--popt): %s" % err) sys.exit(1)
def main(): usage = "%prog [options] <seq-file>" description = "Parses a sequence from a sequence index file using the "\ "annotations stored in the same file." parser = OptionParser(usage=usage, description=description) parser.add_option( "--popt", "--parser-options", dest="popts", action="append", help= "specify options for the parser. Type '--popt help' to get a list of options (we use a DirectedCkyParser)" ) parser.add_option("--derivations", "--deriv", dest="derivations", action="store_true", help="print out derivation traces of all the results") parser.add_option("--index", "-i", dest="index", action="store", type="int", help="parse just the sequence with this index") parser.add_option("--quiet", "-q", dest="quiet", action="store_true", help="show only errors in the output") parser.add_option( "--tonal-space", "--ts", dest="tonal_space", action="store_true", help="show the tonal space path (with -q, shows only paths)") parser.add_option( "--output-set", "-o", dest="output_set", action="store", help="store the analyses to a tonal space analysis set with this name") parser.add_option( "--trace-parse", "-t", dest="trace_parse", action="store_true", help= "output a trace of the shift-reduce parser's operations in producing the full interpretation from the annotations" ) options, arguments = parser.parse_args() if len(arguments) < 1: print "You must specify a sequence file" sys.exit(1) if options.popts is not None: poptstr = options.popts if "help" in [s.strip().lower() for s in poptstr]: # Output this tagger's option help print options_help_text( DirectedCkyParser.PARSER_OPTIONS, intro="Available options for the directed parser") return 0 else: poptstr = "" popts = ModuleOption.process_option_string(poptstr) grammar = get_grammar() if options.quiet: logger = create_plain_stderr_logger(log_level=logging.ERROR) else: logger = create_plain_stderr_logger() if options.trace_parse: parse_logger = logger else: parse_logger = None seq_index = SequenceIndex.from_file(arguments[0]) # Get the chord sequence(s) if options.index is None: seqs = seq_index.sequences else: seqs = [seq_index.sequence_by_index(options.index)] logger.info("%d sequences\n" % len(seqs)) full_analyses = [] stats = { 'full': 0, 'partial': 0, 'fail': 0, } # Try parsing every sequence for seq in seqs: logger.info("====== Sequence %s =======" % seq.string_name) try: results = parse_sequence_with_annotations( seq, grammar, logger=logger, parse_logger=parse_logger) except ParseError, err: logger.error("Error parsing: %s" % err) stats['fail'] += 1 else: # This may have resulted in multiple partial parses logger.info("%d partial parses" % len(results)) if len(results) == 1: stats['full'] += 1 else: stats['partial'] += 1 if options.derivations: # Output the derivation trace for each partial parse for result in results: print print result.derivation_trace if options.tonal_space: # Output the tonal space coordinates path = grammar.formalism.sign_to_coordinates(results[0]) for i, point in enumerate(path): print "%d, %d: %s" % (seq.id, i, point) # Only include a result in the output analyses if it was a full parse if len(results) == 1: full_analyses.append((seq.string_name, results[0].semantics)) else: logger.warn("%s was not included in the output analyses, "\ "since it was not fully parsed" % seq.string_name)
def main(): usage = "%prog [<options>]" description = "Runs a supertagger from the Jazz Parser to tag some input "\ "but just outputs the results, rather than continuing to parse." optparser = OptionParser(usage=usage, description=description) # Tagger options optparser.add_option("-t", "--tagger", "--supertagger", dest="supertagger", action="store", help="run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s" % settings.DEFAULT_SUPERTAGGER, default=settings.DEFAULT_SUPERTAGGER) optparser.add_option("--topt", "--tagger-options", dest="topts", action="append", help="specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options.") # Commonly-used misc optparser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") # File input options optparser.add_option("--file", "-f", dest="file", action="store", help="use a file to get parser input from. Use --filetype to specify the type of the file.") optparser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords", default='chords') optparser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") # Misc options optparser.add_option("-v", "--debug", dest="debug", action="store_true", help="output verbose debugging information.") optparser.add_option("-i", "--interactive", dest="interactive", action="store_true", help="instead of just outputing all tags in one go, wait for user input between each iteration of adaptive supertagging") # Logging options optparser.add_option("--logger", dest="logger", action="store", help="directory to put parser logging in. A filename based on an identifier for each individual input will be appended.") # Read in command line options and args options, clinput = parse_args_with_config(optparser) ########################### Option processing #################### if options.logger: # Directory parse_logger_dir = options.logger check_directory(parse_logger_dir) else: parse_logger_dir = None ######## Grammar ######## # Read in the grammar grammar = get_grammar(options.grammar) ######## Supertagger ######## # Load the supertagger requested if options.supertagger.lower() == "help": print "Available taggers are: %s" % ", ".join(TAGGERS) return 0 try: tagger_cls = get_tagger(options.supertagger) except TaggerLoadError: logger.error("The tagger '%s' could not be loaded. Possible "\ "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS))) return 1 # Get supertagger options before initializing the tagger if options.topts is not None: toptstr = options.topts if "help" in [s.strip().lower() for s in toptstr]: # Output this tagger's option help from jazzparser.utils.options import options_help_text print options_help_text(tagger_cls.TAGGER_OPTIONS, intro="Available options for selected tagger") return 0 toptstr = ":".join(toptstr) else: toptstr = "" topts = ModuleOption.process_option_string(toptstr) # Check that the options are valid try: tagger_cls.check_options(topts) except ModuleOptionError, err: print "Problem with tagger options (--topt): %s" % err return 1
def main(): usage = "%prog [options] <model_name> <input-file>" description = ( "Trains a model for the RaphSto chord labelling " "algorithm on a file that contains a list of midi files with " "training options" ) parser = OptionParser(usage=usage, description=description) parser.add_option( "-p", "--partitions", dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.", ) parser.add_option( "--opts", dest="opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.", ) parser.add_option( "--proc", "--processes", dest="processes", action="store", type="int", help="number of parallel processes to spawn for the training. Use -1 to spawn one per training sequence (after splitting: see split_length)", default=1, ) parser.add_option( "--max-length", dest="max_length", action="store", type="int", help="limits the length of the training midi sequences in chunks", ) parser.add_option( "--split-length", dest="split_length", action="store", type="int", help="limits the length of the training midi sequences in chunks, but instead of throwing away everything after the first N chunks, splits it off as if it were starting a new sequence. This is good for multiprocessing, since many short sequences can be multitasked, whilst few long ones cannot", ) parser.add_option( "--min-length", dest="min_length", action="store", type="int", help="ignores any sequences under this number of chunks. This is useful with --split-length, which can leave very short sequences from the end of a split sequence", ) parser.add_option( "--progress-out", dest="progress_out", action="store", help="output logging info to a file instead of the command line", ) parser.add_option( "--init-model", dest="init_model", action="store", help="initialize the model using parameters from an already trained model", ) parser.add_option( "--init-ctrans", dest="init_ctrans", action="store", help="initialize the chord transition distribution using these parameters. Comma-separated list of params given as C0->C1-P, where C0 and C1 are chords (I, II, etc) and P is a float probability", ) parser.add_option( "--chord-set", dest="chord_set", action="store", help="use a chord set other than the default. Use value 'help' to see a list. Has no effect in combination with --init-model, since the old model's chord set will be used", ) parser.add_option( "-m", "--model-type", dest="model_type", action="store", help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()), default="standard", ) options, arguments = parse_args_with_config(parser) if options.opts is not None and options.opts == "help": print options_help_text(RaphstoBaumWelchTrainer.OPTIONS, intro="Training options for Raphael and Stoddard HMMs") sys.exit(0) opts = ModuleOption.process_option_string(options.opts) if len(arguments) < 2: print >> sys.stderr, "You must specify a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] print >> sys.stderr, "Raphsto training beginning at %s" % datetime.now().isoformat(" ") # Create a logger to output the progress of the training to stdout or a file if options.progress_out is not None: stdout = False logfile = options.progress_out print >> sys.stderr, "Outputing logging info to %s" % logfile else: stdout = True logfile = None print >> sys.stderr, "Outputing logging to stdout" logger = create_logger(name="raphsto_train", filename=logfile, stdout=stdout) logger.info("Raphael and Stoddard HMM model training") if options.model_type not in MODEL_TYPES: print >> sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES) sys.exit(1) model_cls = MODEL_TYPES[options.model_type] if options.chord_set == "help": print "Available chord sets: %s" % ", ".join(constants.CHORD_SETS.keys()) sys.exit(0) elif options.chord_set is not None: # Check this chord set exists if options.chord_set not in constants.CHORD_SETS: print >> sys.stderr, "Chord set '%s' does not exist" % options.chord_set sys.exit(1) else: logger.info("Using chord set '%s'" % options.chord_set) # Read in the training data midis = InputSourceFile(filename) handlers = midis.get_handlers() logger.info("Reading in %d midi files..." % len(midis.inputs)) training_data = [] for i, mh in enumerate(handlers): logger.info("%s: %s" % (i, midis.inputs[i][0])) emissions = mh.get_emission_stream()[0] if options.max_length is not None and len(emissions) > options.max_length: logger.info("Truncating file %d to %d chunks (was %d)" % (i, options.max_length, len(emissions))) emissions = emissions[: options.max_length] if options.split_length is not None: logger.info("Splitting sequence %d into sequence no longer " "than %d chunks" % (i, options.split_length)) # Split up the sequence if it's too long while len(emissions) > options.split_length: training_data.append(emissions[: options.split_length]) emissions = emissions[options.split_length :] training_data.append(emissions) if options.min_length is not None: # Make sure there are no sequences under the minimum length # Just throw away any that are before_chuck = len(training_data) training_data = [seq for seq in training_data if len(seq) >= options.min_length] if len(training_data) != before_chuck: logger.info( "Threw away %d short sequences (below %d chunks)" % ((before_chuck - len(training_data)), options.min_length) ) logger.info( "Training on %d sequences. Lengths: %s" % (len(training_data), ", ".join(str(len(seq)) for seq in training_data)) ) if options.partitions is not None: parts = holdout_partition(training_data, options.partitions) models = [("%s%d" % (model_name, num), data) for num, data in enumerate(parts)] else: models = [(model_name, training_data)] # Number of processes to use if options.processes == -1: # Special value: means number of training sequences (one process per sequence) processes = len(training_data) else: processes = options.processes for part_name, data in models: # Instantiate a fresh model with this name logger.info("Training model '%s' on %d midis" % (part_name, len(data))) if options.init_model is not None: logger.info("Initializing using parameters from model '%s'" % options.init_model) # Load an already trained model as initialization model = model_cls.initialize_existing_model(options.init_model, model_name=part_name) else: # TODO: make these probs an option ctype_params = (0.5, 0.3, 0.2) logger.info("Initializing to naive chord types using parameters: " "%s, %s, %s" % ctype_params) init_kwargs = {"model_name": part_name} if options.chord_set is not None: # Specify a chord set for the model init_kwargs["chord_set"] = options.chord_set model = model_cls.initialize_chord_types(ctype_params, **init_kwargs) # Initialize the chord transition probabilities if given if options.init_ctrans is not None: logger.info("Initializing chord transition distribution to %s" % options.init_ctrans) model.set_chord_transition_probabilities(options.init_ctrans) # Retrain it with the loaded data trainer = model_cls.get_trainer()(model, options=opts) trainer.train(data, logger=logger, processes=processes, save_intermediate=True) print >> sys.stderr, "Training terminating at %s" % datetime.now().isoformat(" ")
def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a supertagging model using the given "\ "input data. Specify a model type (baseline1, etc) and a name to "\ "identify it. The data file may be a stored SequenceIndex file, or "\ "any other type of bulk data file. "\ "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS) parser = OptionParser(usage=usage, description=description) parser.add_option( '-p', '--partitions', dest="partitions", action="store", type="int", help= "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number." ) parser.add_option( '--opts', dest="training_opts", action="store", help= "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type." ) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) # Logging output parser.add_option( '--log', dest="log", action="store", help= "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end" ) options, arguments = parse_args_with_config(parser) grammar = Grammar() # Get the model type first: we might not need the other args if len(arguments) == 0: print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments" model_type = arguments[0] if model_type not in TRAINABLE_MODELS: print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \ (model_type, ", ".join(TRAINABLE_MODELS)) sys.exit(1) if model_type not in TAGGERS: print >>sys.stderr, "'%s' isn't a registered model type. Check that "\ "the name in TRAINABLE_MODELS is correct" % model_type sys.exit(1) tagger_cls = get_tagger(model_type) if not issubclass(tagger_cls, ModelTagger): print >> sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % ( tagger_cls.__name__) sys.exit(1) model_cls = tagger_cls.MODEL_CLASS # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) # Get the rest of the args if len(arguments) < 3: print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_name = arguments[1] # Load the sequence data # Only allow bulk types input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=get_input_type_names( single=False, bulk=True)) if options.partitions is not None and options.partitions > 1: parts = input_data.get_partitions(options.partitions)[1] models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name, input_data)] for part_name, seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) if options.log is not None: # Prepare a logger logfile = "%s%s.log" % (options.log, part_name) print "Logging output to file %s" % logfile logger = create_logger(filename=logfile) else: logger = None # Train the model with the loaded data model.train(seqs, logger=logger) model.save() print "Trained model %s" % (part_name)
def command_line_input(filename=None, filetype=None, options="", \ allowed_types=None, default_type=None): """ Utility function for processing file input options from the command line. Pass in as args the values straight from the command line options to select a filename, filetype and list of options. Typical command-line options for this purpose (for an optparse option parser C{op}):: op.add_option("--file", "-f", dest="file", action="store", help="use a file to get input from") op.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Use '--filetype help' for a list of available types") op.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Use '--fopt help', with '--ft <type>', for a list of available options") Then you can call this function as:: command_line_input(filename=options.file, filetype=options.filetype, options=options.file_options) @type allowed_types: list of strs @param allowed_types: types of input you want the user to be able to give. If not given, all types are allowed @type default_type: str @param default_type: filetype to assume if no other filetype is given @rtype: L{InputReader} subclass @return: the input wrapper of appropriate type, or None if no input file was given """ if allowed_types is None: allowed_types = get_input_type_names() if filetype is None and default_type is not None: filetype = default_type # Catch a request for filetype help if filetype is not None and filetype.lower() == "help": # Output possible file types print "Allowed input types: %s" % ", ".join(allowed_types) sys.exit(0) # Check that the filetype is valid and get the input type class if it is input_type = get_input_type(filetype) type_name = input_type_name(input_type) if input_type is None: raise InputTypeError, "Unknown filetype '%s'. Allowed types are: %s" % \ (filetype, ", ".join(allowed_types)) if type_name not in allowed_types: raise InputTypeError, "Cannot accept input of type '%s'. Allowed "\ "types are: %s" % (filetype, ", ".join(allowed_types)) if options is not None and options.lower() == "help": # Output help text from jazzparser.utils.options import options_help_text print options_help_text(input_type.FILE_INPUT_OPTIONS, intro="Available options for input type %s" % type_name) sys.exit(0) if filename is None: return None # First get a dict of the options file_options = ModuleOption.process_option_string(options) # Process the options as appropriate for this type file_options = input_type.process_option_dict(file_options) # Instantiate the input from the file as appropriate for the input type input_data = input_type.from_file(filename, file_options) return input_data
def run(self, args, state): from jazzparser.formalisms.music_halfspan.evaluation import \ tonal_space_local_alignment, tonal_space_distance from jazzparser.formalisms.music_halfspan import Formalism metric_name = self.options['metric'] if metric_name == "help": # Print a list of available metrics print ", ".join([ metric.name for metric in Formalism.semantics_distance_metrics ]) return if len(args) == 0: resnum = 0 else: resnum = int(args[0]) if self.options['average'] and self.options['average'] > 1: # Average the distance over several results resnums = range(resnum, resnum + self.options['average']) else: # Just a single result resnums = [resnum] resultsems = [] for resnum in resnums: # Get the result semantics that we're going to try to match if resnum >= len(state.results): raise ShellError, "No result number %d" % resnum result = state.results[resnum] resultsems.append(result.semantics) # Get the loaded songset containing the song corpus songset = state.get_data( "songset", help_msg="Use command 'loadsongs' to load a songset") # Load the appropriate metric if metric_name is None: # Use the first in the list as default metric_cls = Formalism.semantics_distance_metrics[0] else: for m in Formalism.semantics_distance_metrics: if m.name == metric_name: metric_cls = m break else: # No metric found matching this name print "No metric '%s'" % metric_name sys.exit(1) print "Using distance metric: %s\n" % metric_cls.name # Now process the metric options moptstr = self.options['mopts'] if moptstr is not None: if moptstr == "help": # Output this metric's option help print options_help_text( metric_cls.OPTIONS, intro="Available options for metric '%s'" % metric_cls.name) return else: moptstr = "" mopts = ModuleOption.process_option_string(moptstr) # Instantiate the metric with these options metric = metric_cls(options=mopts) song_distances = {} # Try matching against each song for resultsem in resultsems: for name, song in songset.analyses: distance = metric.distance(resultsem, song) song_distances.setdefault(name, []).append(distance) # Average the scores distances = [] for name, costs in song_distances.items(): ave_cost = sum(costs) / float(len(costs)) distances.append((ave_cost, name)) # Sort so the closest ones come first distances.sort(key=lambda x: x[0]) # Output all the songs, ordered by similarity, with their distance for i, (distance, name) in enumerate(distances): print "%d> %s (%s)" % (i, name, distance)
def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a supertagging model using the given "\ "input data. Specify a model type (baseline1, etc) and a name to "\ "identify it. The data file may be a stored SequenceIndex file, or "\ "any other type of bulk data file. "\ "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS) parser = OptionParser(usage=usage, description=description) parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.") parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.") # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") # Logging output parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end") options, arguments = parse_args_with_config(parser) grammar = Grammar() # Get the model type first: we might not need the other args if len(arguments) == 0: print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments" model_type = arguments[0] if model_type not in TRAINABLE_MODELS: print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \ (model_type, ", ".join(TRAINABLE_MODELS)) sys.exit(1) if model_type not in TAGGERS: print >>sys.stderr, "'%s' isn't a registered model type. Check that "\ "the name in TRAINABLE_MODELS is correct" % model_type sys.exit(1) tagger_cls = get_tagger(model_type) if not issubclass(tagger_cls, ModelTagger): print >>sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (tagger_cls.__name__) sys.exit(1) model_cls = tagger_cls.MODEL_CLASS # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) # Get the rest of the args if len(arguments) < 3: print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_name = arguments[1] # Load the sequence data # Only allow bulk types input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=get_input_type_names(single=False, bulk=True)) if options.partitions is not None and options.partitions > 1: parts = input_data.get_partitions(options.partitions)[1] models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name,input_data)] for part_name,seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) if options.log is not None: # Prepare a logger logfile = "%s%s.log" % (options.log, part_name) print "Logging output to file %s" % logfile logger = create_logger(filename=logfile) else: logger = None # Train the model with the loaded data model.train(seqs, logger=logger) model.save() print "Trained model %s" % (part_name)