예제 #1
0
def main():
    usage = "%prog [options] <chord-corpus-file> <chord-labeling-model> <midi-file>"
    description = "Like findsong, but searches by chord label sequence "\
        "similarity. The input is not a results file, but a midi file, or "\
        "a midi bulk input (CSV)."
    parser = OptionParser(usage=usage)
    parser.add_option("--popt", "--parser-options", dest="popts", action="append", help="specify options for the parser that interprets the gold standard annotations. Type '--popt help' to get a list of options (we use a DirectedCkyParser)")
    parser.add_option("-r", "--print-results", dest="print_results", action="store", default=5, type="int", help="number of top search results to print for each query (parse result). Default: 5. Use -1 to print distances from all songs in the corpus")
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", default="bulk-segmidi", help="filetype to read in. Use 'segmidi' to read a single midi file, or 'bulk-segmidi' (default) to read many from a CSV")
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    parser.add_option("--labeler-options", "--lopt", dest="labeler_options", action="store", help="options for the labeler. Type '--lopt help' for a list of available options.")
    parser.add_option("-g", "--gold-only", dest="gold_only", action="store_true", help="skip results that have no gold standard sequence associated with them (we can't tell which is the right answer for these)")
    parser.add_option("--align", "--print-alignment", dest="print_alignment", action="store_true", help="print out the full alignment between the labeling and the top match")
    options, arguments = parser.parse_args()
    
    # Process parser options
    if options.popts is not None:
        poptstr = options.popts
        if "help" in [s.strip().lower() for s in poptstr]:
            # Output this parser's option help
            print options_help_text(DirectedCkyParser.PARSER_OPTIONS, intro="Available options for gold standard interpreter")
            sys.exit(0)
        poptstr = ":".join(poptstr)
    else:
        poptstr = ""
    popts = ModuleOption.process_option_string(poptstr)
    # Check that the options are valid
    try:
        DirectedCkyParser.check_options(popts)
    except ModuleOptionError, err:
        logger.error("Problem with parser options (--popt): %s" % err)
        sys.exit(1)
예제 #2
0
def main():
    usage = "%prog [options] <song-set> <results-file0> [<results-file1> ...]"
    parser = OptionParser(usage=usage)
    parser.add_option("--popt", "--parser-options", dest="popts", action="append", help="specify options for the parser that interprets the gold standard annotations. Type '--popt help' to get a list of options (we use a DirectedCkyParser)")
    parser.add_option("-m", "--metric", dest="metric", action="store", help="semantics distance metric to use. Use '-m help' for a list of available metrics")
    parser.add_option("--mopt", "--metric-options", dest="mopts", action="append", help="options to pass to the semantics metric. Use with '--mopt help' with -m to see available options")
    parser.add_option("-r", "--print-results", dest="print_results", action="store", default=5, type="int", help="number of top search results to print for each query (parse result). Default: 5. Use -1 to print distances from all songs in the corpus")
    parser.add_option("-g", "--gold-only", dest="gold_only", action="store_true", help="skip results that have no gold standard sequence associated with them (we can't tell which is the right answer for these)")
    parser.add_option("--mc", "--metric-computation", dest="metric_computation", action="store_true", help="output the computation information for the metric between the parse result and each top search result")
    options, arguments = parser.parse_args()
    
    # For now, we always use the music_halfspan formalism with this script
    # If we wanted to make it generic, we'd just load the formalism according 
    #  to a command-line option
    formalism = Formalism
    
    # Process parser options
    if options.popts is not None:
        poptstr = options.popts
        if "help" in [s.strip().lower() for s in poptstr]:
            # Output this parser's option help
            print options_help_text(DirectedCkyParser.PARSER_OPTIONS, intro="Available options for gold standard interpreter")
            sys.exit(0)
        poptstr = ":".join(poptstr)
    else:
        poptstr = ""
    popts = ModuleOption.process_option_string(poptstr)
    # Check that the options are valid
    try:
        DirectedCkyParser.check_options(popts)
    except ModuleOptionError, err:
        logger.error("Problem with parser options (--popt): %s" % err)
        sys.exit(1)
예제 #3
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a backoff builder model using the given "\
        "input data. Specify a model type (ngram, etc) and a name to "\
        "identify it. The data file should be a stored SequenceIndex file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    options, arguments = parse_args_with_config(parser)
    
    if len(arguments) < 3:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_type = arguments[0]
    model_name = arguments[1]
    
    builder_cls = get_backoff_builder(model_type)
    model_cls = builder_cls.MODEL_CLASS
    
    # Load the sequence data from a dbinput file
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=['bulk-db', 'bulk-db-annotated'])
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
                            ModuleOption.process_option_string(options.training_opts), 
                            model_cls.TRAINING_OPTIONS)
        
    if options.partitions is not None:
        parts = holdout_partition(input_data, options.partitions)
        models = [(builder_cls.partition_model_name(model_name,num),seqs) for \
                        num,seqs in enumerate(parts)]
    else:
        models = [(model_name,input_data)]
    
    for part_name,seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        # Train it with the loaded data
        model.train(seqs)
        model.save()
        print "Trained model %s" % (part_name)
예제 #4
0
 def cl_output_options(cls, string):
     """
     Convenience method so you don't have to do this lots of times over.
     
     Take a string of output options from the command line and set the 
     output options from it.
     
     Should only be used in command-line scripts.
     
     """
     if string is not None and string.lower() == "help":
         print "Available output options"
         print "========================"
         print options_help_text(cls.output_options)
         sys.exit(0)
     optdict = ModuleOption.process_option_string(string)
     cls.process_output_options(optdict)
예제 #5
0
 def cl_output_options(cls, string):
     """
     Convenience method so you don't have to do this lots of times over.
     
     Take a string of output options from the command line and set the 
     output options from it.
     
     Should only be used in command-line scripts.
     
     """
     if string is not None and string.lower() == "help":
         print "Available output options"
         print "========================"
         print options_help_text(cls.output_options)
         sys.exit(0)
     optdict = ModuleOption.process_option_string(string)
     cls.process_output_options(optdict)
예제 #6
0
def main():
    usage = "%prog [options] <model_name> <input-file>"
    description = "Trains a model for the RaphSto chord labelling "\
        "algorithm on a file that contains a list of midi files with "\
        "training options"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    parser.add_option('--proc', '--processes', dest="processes", action="store", type="int", help="number of parallel processes to spawn for the training. Use -1 to spawn one per training sequence (after splitting: see split_length)", default=1)
    parser.add_option('--max-length', dest="max_length", action="store", type="int", help="limits the length of the training midi sequences in chunks")
    parser.add_option('--split-length', dest="split_length", action="store", type="int", help="limits the length of the training midi sequences in chunks, but instead of throwing away everything after the first N chunks, splits it off as if it were starting a new sequence. This is good for multiprocessing, since many short sequences can be multitasked, whilst few long ones cannot")
    parser.add_option('--min-length', dest="min_length", action="store", type="int", help="ignores any sequences under this number of chunks. This is useful with --split-length, which can leave very short sequences from the end of a split sequence")
    parser.add_option('--progress-out', dest="progress_out", action="store", help="output logging info to a file instead of the command line")
    parser.add_option('--init-model', dest="init_model", action="store", help="initialize the model using parameters from an already trained model")
    parser.add_option('--init-ctrans', dest="init_ctrans", action="store", help="initialize the chord transition distribution using these parameters. Comma-separated list of params given as C0->C1-P, where C0 and C1 are chords (I, II, etc) and P is a float probability")
    parser.add_option('--chord-set', dest="chord_set", action="store", help="use a chord set other than the default. Use value 'help' to see a list. Has no effect in combination with --init-model, since the old model's chord set will be used")
    parser.add_option('-m', '--model-type', dest="model_type", action="store", help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()), default="standard")
    options, arguments = parse_args_with_config(parser)
    
    if options.opts is not None and options.opts == "help":
        print options_help_text(RaphstoBaumWelchTrainer.OPTIONS, intro="Training options for Raphael and Stoddard HMMs")
        sys.exit(0)
    opts = ModuleOption.process_option_string(options.opts)
    
    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]
    
    print >>sys.stderr, "Raphsto training beginning at %s" % datetime.now().isoformat(' ')
    # Create a logger to output the progress of the training to stdout or a file
    if options.progress_out is not None:
        stdout = False
        logfile = options.progress_out
        print >>sys.stderr, "Outputing logging info to %s" % logfile
    else:
        stdout = True
        logfile = None
        print >>sys.stderr, "Outputing logging to stdout"
    logger = create_logger(name="raphsto_train", filename=logfile, stdout=stdout)
    logger.info("Raphael and Stoddard HMM model training")
        
    if options.model_type not in MODEL_TYPES:
        print >>sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]
    
    if options.chord_set == "help":
        print "Available chord sets: %s" % ", ".join(constants.CHORD_SETS.keys())
        sys.exit(0)
    elif options.chord_set is not None:
        # Check this chord set exists
        if options.chord_set not in constants.CHORD_SETS:
            print >>sys.stderr, "Chord set '%s' does not exist" % options.chord_set
            sys.exit(1)
        else:
            logger.info("Using chord set '%s'" % options.chord_set)
    
    
    # Read in the training data
    midis = InputSourceFile(filename)
    handlers = midis.get_handlers()
    logger.info("Reading in %d midi files..." % len(midis.inputs))
    training_data = []
    for i,mh in enumerate(handlers):
        logger.info("%s: %s" % (i,midis.inputs[i][0]))
        emissions = mh.get_emission_stream()[0]
        if options.max_length is not None and len(emissions) > options.max_length:
            logger.info("Truncating file %d to %d chunks (was %d)" % \
                                    (i,options.max_length,len(emissions)))
            emissions = emissions[:options.max_length]
        if options.split_length is not None:
            logger.info("Splitting sequence %d into sequence no longer "\
                                "than %d chunks" % (i,options.split_length))
            # Split up the sequence if it's too long
            while len(emissions) > options.split_length:
                training_data.append(emissions[:options.split_length])
                emissions = emissions[options.split_length:]
        training_data.append(emissions)
    
    if options.min_length is not None:
        # Make sure there are no sequences under the minimum length
        # Just throw away any that are
        before_chuck = len(training_data)
        training_data = [seq for seq in training_data if len(seq) >= options.min_length]
        if len(training_data) != before_chuck:
            logger.info("Threw away %d short sequences (below %d chunks)" % \
                    ((before_chuck-len(training_data)), options.min_length))
    
    logger.info("Training on %d sequences. Lengths: %s" % \
                    (len(training_data), 
                     ", ".join(str(len(seq)) for seq in training_data)))
    
    if options.partitions is not None:
        parts = holdout_partition(training_data, options.partitions)
        models = [("%s%d" % (model_name,num),data) for num,data in enumerate(parts)]
    else:
        models = [(model_name,training_data)]
        
    # Number of processes to use
    if options.processes == -1:
        # Special value: means number of training sequences (one process per sequence)
        processes = len(training_data)
    else:
        processes = options.processes
    
    for part_name,data in models:
        # Instantiate a fresh model with this name
        logger.info("Training model '%s' on %d midis" % (part_name, len(data)))
        if options.init_model is not None:
            logger.info("Initializing using parameters from model '%s'" % \
                options.init_model)
            # Load an already trained model as initialization
            model = model_cls.initialize_existing_model(options.init_model, \
                model_name=part_name)
        else:
            # TODO: make these probs an option
            ctype_params = (0.5, 0.3, 0.2)
            logger.info("Initializing to naive chord types using parameters: "\
                "%s, %s, %s" % ctype_params)
            init_kwargs = { 'model_name' : part_name }
            if options.chord_set is not None:
                # Specify a chord set for the model
                init_kwargs['chord_set'] = options.chord_set
            model = model_cls.initialize_chord_types(ctype_params, **init_kwargs)
            
            # Initialize the chord transition probabilities if given
            if options.init_ctrans is not None:
                logger.info("Initializing chord transition distribution to %s" \
                    % options.init_ctrans)
                model.set_chord_transition_probabilities(options.init_ctrans)
        # Retrain it with the loaded data
        trainer = model_cls.get_trainer()(model, options=opts)
        trainer.train(data, logger=logger, processes=processes, save_intermediate=True)
    print >>sys.stderr, "Training terminating at %s" % datetime.now().isoformat(' ')
예제 #7
0
     print "Available taggers are: %s" % ", ".join(TAGGERS)
     return 0
 try:
     tagger_cls = get_tagger(options.supertagger)
 except TaggerLoadError:
     logger.error("The tagger '%s' could not be loaded. Possible "\
         "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS)))
     return 1
     
 # Get supertagger options before initializing the tagger
 if options.topts is not None:
     toptstr = options.topts
     if "help" in [s.strip().lower() for s in toptstr]:
         # Output this tagger's option help
         from jazzparser.utils.options import options_help_text
         print options_help_text(tagger_cls.TAGGER_OPTIONS, intro="Available options for selected tagger")
         return 0
     toptstr = ":".join(toptstr)
 else:
     toptstr = ""
 topts = ModuleOption.process_option_string(toptstr)
 # Check that the options are valid
 try:
     tagger_cls.check_options(topts)
 except ModuleOptionError, err:
     logger.error("Problem with tagger options (--topt): %s" % err)
     return 1
 
 ######## Backoff ########
 # Load the requested backoff model, if any
 if options.backoff is not None:
예제 #8
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Loads a chord labeling model and uses it to assign chord "\
        "labels to the given MIDI file."
    parser = OptionParser(usage=usage, description=description)
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='segmidi')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Labeling options
    parser.add_option("--labeler-options", "--lopt", dest="labeler_options", action="append", help="options for the labeler. Type '--lopt help' for a list of available options.")
    parser.add_option("--no-key", "--nk", dest="no_key", action="store_true", help="merge together labels with the same key (same as --lopt nokey)")
    # Output options
    parser.add_option("--single", "-1", dest="single", action="store_true", help="show only one chord per time segment (same as --lopt n=1, but formats the output in a simpler way)")
    parser.add_option('-r', '--realize', dest="realize", action="store", help="realize the chord sequence as a midi file, overlaid on the input")
    parser.add_option('--chords-only', dest="chords_only", action="store_true", help="only realize the chords: don't overlay on the input midi (only works with -r)")
    options, arguments = parse_args_with_config(parser)
    
    if options.labeler_options is not None and "help" in options.labeler_options:
        print options_help_text(HPChordLabeler.LABELING_OPTIONS, intro="Options for HP chord labeler")
        sys.exit(0)
        
    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model name and an input "\
            "(MIDI) data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]
    
    # Process the labeler options
    lopt_dict = ModuleOption.process_option_string(options.labeler_options)
    if options.single:
        # No point in getting more than one label, since we only display one
        lopt_dict['n'] = 1
    if options.no_key:
        # Just set the nokey option
        lopt_dict['nokey'] = True
    
    # Check they're valid before doing anything else
    HPChordLabeler.process_labeling_options(lopt_dict)
    
    input_data = command_line_input(filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=['segmidi','bulk-segmidi'])
    bulk = not is_bulk_type(type(input_data))
    if bulk:
        input_data = [input_data]
        
    for i,data in enumerate(input_data):
        input_stream = data.stream
        print "Read midi data in %d segments" % len(data)
        
        # Load the model
        model = HPChordLabeler.load_model(model_name)
        # Perform labeling
        labels = model.label(data, options=lopt_dict)
        # Try labeling as it will be passed to the tagger
        labs = model.label_lattice(data, options=lopt_dict)
        
        if options.single:
            # Special output for single label output
            print ", ".join(["%s" % timelabs[0][0] for timelabs in labels])
        else:
            # Print out the labels for each timestep
            for time,timelabs in enumerate(labels):
                print "%d: %s" % (time, 
                    ", ".join(["%s (%.2e)" % (label,prob) for (label,prob) in timelabs]))
        
        if options.realize is not None:
            # Get the single best chord label for each time
            best_labels = [timelabs[0][0] for timelabs in labels]
            # Realize as a midi file
            print "Realizing output chord sequence"
            real = ChordSequenceRealizer(best_labels, 
                                         model.chord_vocab, 
                                         resolution=input_stream.resolution, 
                                         chord_length=data.time_unit,
                                         text_events=True)
            if options.chords_only:
                # Don't overlay
                stream = real.generate(offset=data.tick_offset)
            else:
                stream = real.generate(overlay=input_stream, offset=data.tick_offset)
                
            if bulk:
                filename = "%s-%d" % (options.realize, i)
            else:
                filename = options.realize
            write_midifile(stream, filename)
예제 #9
0
def main():
    usage = "%prog [<options>] <model-name> <training-input>"
    description = "Training of PCFG models."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \
        help="Number of partitions to divide the data into. "\
            "For train, divides the input file, trains a model on each "\
            "partition's complement and appends partition number to "\
            "the model names. For del, appends partition numbers to model "\
            "names and deletes all the models. Recache does similarly. "\
            "Has no effect for parse.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options")
    parser.add_option("--debug", dest="debug", action="store_true", help="Output verbose logging information to stderr")
    parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.")
    options, arguments = parse_args_with_config(parser)
    
    if options.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.WARN
    # Create a logger for training
    logger = create_logger(log_level = log_level,
                  name = "training",
                  stderr = True)
    
    # Load a grammar
    grammar = get_grammar(options.grammar)
    # Get the pcfg model class for the formalism
    PcfgModel = grammar.formalism.PcfgModel
        
    # Parse the option string
    if options.training_opts is None:
        opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(PcfgModel.TRAINING_OPTIONS, 
                                            intro="Training options for PCFGs")
        sys.exit(0)
    else:
        opts = ModuleOption.process_option_dict(
                    ModuleOption.process_option_string(options.training_opts),
                    PcfgModel.TRAINING_OPTIONS)
    
    if len(arguments) == 0:
        print >>sys.stderr, "Specify a model name"
        models = PcfgModel.list_models()
        print >>sys.stderr, "Available models: %s" % ", ".join(models)
        sys.exit(1)
    model_name = arguments[0]
    print "Model base name:", model_name
    
    if options.partitions is not None:
        parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)]
    else:
        parts = [(None, model_name)]
    
    if len(arguments) < 2:
        print >>sys.stderr, "Specify an input file to read sequence data from"
        sys.exit(1)
    # Read in the training data from the given file
    seqs = SequenceIndex.from_file(arguments[1])
    
    if options.partitions is not None:
        # Prepare each training partition
        datasets = holdout_partition(seqs.sequences, options.partitions)
    else:
        datasets = [seqs.sequences]
        
    for dataset,(parti,part_model) in zip(datasets,parts):
        # Train the named model on the sequence data
        model = PcfgModel.train(part_model, dataset, opts, grammar=grammar, 
                                logger=logger)
        model.save()
        print "Trained model", part_model
예제 #10
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Trains a chord labeling model using the given "\
        "input data. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="append",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Logging output
    parser.add_option(
        '--log',
        dest="log",
        action="store",
        help=
        "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end"
    )
    options, arguments = parse_args_with_config(parser)

    grammar = Grammar()

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif "help" in [opt.lower() for opt in options.training_opts]:
        print options_help_text(HPChordLabeler.TRAINING_OPTIONS,
                                intro="Training options:")
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_string(
            options.training_opts)

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(
                                        single=False, bulk=True))

    # Only partition the chord data, not the MIDI data
    if options.partitions is not None and not \
            (isinstance(input_data, MidiTaggerTrainingBulkInput) and \
             input_data.chords is not None):
        print >>sys.stderr, "Can only partition chord data and no chord data "\
            "was supplied"
        sys.exit(1)

    if options.partitions:
        # The input includes chord training data
        parts = input_data.chords.get_partitions(options.partitions)[1]
        models = [("%s%d" % (model_name,num),chord_data) \
            for num,chord_data in enumerate(parts)]
    else:
        models = [(model_name, None)]

    for part_name, chord_data in models:
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None

        # Create a fresh model with this name
        model = HPChordLabeler.train(input_data,
                                     part_name,
                                     logger=logger,
                                     options=training_opts,
                                     chord_data=chord_data)
        print "Trained model %s" % (part_name)
예제 #11
0
def command_line_metric(formalism, metric_name=None, options=""):
    """
    Utility function to make it easy to load a metric, with user-specified 
    options, from the command line. Takes care of printing help output.
    
    Typical options::
      parser.add_option("-m", "--metric", dest="metric", action="store", 
          help="semantics distance metric to use. Use '-m help' for a list of available metrics")
      parser.add_option("--mopt", "--metric-options", dest="mopts", action="append", 
          help="options to pass to the semantics metric. Use with '--mopt help' with -m to see available options")
    
    You could then call this as::
      metric = command_line_metric(formalism, options.metric, options.mopts)
    
    @return: the metric instantiated with given options
    
    """
    import sys
    from jazzparser.utils.options import ModuleOption, options_help_text

    # Get a distance metric
    # Just check this, as it'll cause problems
    if len(formalism.semantics_distance_metrics) == 0:
        print "ERROR: the formalism defines no distance metrics, so this "\
            "script won't work"
        sys.exit(1)

    # First get the metric
    if metric_name == "help":
        # Print out a list of metrics available
        print "Available distance metrics:"
        print ", ".join([metric.name for metric in \
                                        formalism.semantics_distance_metrics])
        sys.exit(0)

    if metric_name is None:
        # Use the first in the list as default
        metric_cls = formalism.semantics_distance_metrics[0]
    else:
        # Look for the named metric
        for m in formalism.semantics_distance_metrics:
            if m.name == metric_name:
                metric_cls = m
                break
        else:
            # No metric found matching this name
            print "No metric '%s'" % metric_name
            sys.exit(1)

    # Options might be given as a list, if the option action was "append"
    if isinstance(options, str):
        options = [options]
    # Now process the metric options
    if options is not None:
        moptstr = options
        if "help" in [s.strip().lower() for s in options]:
            # Output this parser's option help
            print options_help_text(metric_cls.OPTIONS,
                                    intro="Available options for metric '%s'" %
                                    metric_cls.name)
            sys.exit(0)
        moptstr = ":".join(moptstr)
    else:
        moptstr = ""
    mopts = ModuleOption.process_option_string(moptstr)
    # Instantiate the metric with these options
    metric = metric_cls(options=mopts)

    return metric
def command_line_input(filename=None, filetype=None, options="", allowed_types=None, default_type=None):
    """
    Utility function for processing file input options from the command line.
    Pass in as args the values straight from the command line options to 
    select a filename, filetype and list of options.
    
    Typical command-line options for this purpose (for an optparse option parser C{op})::
     op.add_option("--file", "-f", dest="file", action="store", help="use a file to get input from")
     op.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Use '--filetype help' for a list of available types")
     op.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Use '--fopt help', with '--ft <type>', for a list of available options")
    Then you can call this function as::
     command_line_input(filename=options.file, filetype=options.filetype, options=options.file_options)
    
    @type allowed_types: list of strs
    @param allowed_types: types of input you want the user to be able to give.
        If not given, all types are allowed
    @type default_type: str
    @param default_type: filetype to assume if no other filetype is given
    @rtype: L{InputReader} subclass
    @return: the input wrapper of appropriate type, or None if no input file 
        was given
    
    """
    if allowed_types is None:
        allowed_types = get_input_type_names()

    if filetype is None and default_type is not None:
        filetype = default_type

    # Catch a request for filetype help
    if filetype is not None and filetype.lower() == "help":
        # Output possible file types
        print "Allowed input types: %s" % ", ".join(allowed_types)
        sys.exit(0)

    # Check that the filetype is valid and get the input type class if it is
    input_type = get_input_type(filetype)
    type_name = input_type_name(input_type)
    if input_type is None:
        raise InputTypeError, "Unknown filetype '%s'. Allowed types are: %s" % (filetype, ", ".join(allowed_types))
    if type_name not in allowed_types:
        raise InputTypeError, "Cannot accept input of type '%s'. Allowed " "types are: %s" % (
            filetype,
            ", ".join(allowed_types),
        )

    if options is not None and options.lower() == "help":
        # Output help text
        from jazzparser.utils.options import options_help_text

        print options_help_text(input_type.FILE_INPUT_OPTIONS, intro="Available options for input type %s" % type_name)
        sys.exit(0)

    if filename is None:
        return None

    # First get a dict of the options
    file_options = ModuleOption.process_option_string(options)
    # Process the options as appropriate for this type
    file_options = input_type.process_option_dict(file_options)

    # Instantiate the input from the file as appropriate for the input type
    input_data = input_type.from_file(filename, file_options)
    return input_data
예제 #13
0
def main():
    set_proc_title("jazzparser")
    ########################################################
    usage = "jazzparser [<options>]"
    description = "The main parser interface for the Jazz Parser"
    ## Process the input options
    optparser = OptionParser(usage=usage, description=description)
    ###
    # File input options
    group = OptionGroup(optparser, "Input", "Input type and location")
    optparser.add_option_group(group)
    group.add_option("--file", "-f", dest="file", action="store", help="use a file to get parser input from. Use --filetype to specify the type of the file.")
    group.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords", default='chords')
    group.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    group.add_option("--index", "--indices", dest="input_index", action="store", help="select individual inputs to process. Specify as a comma-separated list of indices. All inputs are loaded as usual, but only the ith input is processed, for each i in the list")
    group.add_option("--only-load", dest="only_load", action="store_true", help="don't do anything with the inputs, just load and list them. Handy for checking the inputs load and getting their indices")
    group.add_option("--partitions", dest="partitions", action="store", type="int", help="divide the input data into this number of partitions and use a different set of models for each. For any parser, tagger and backoff that takes a 'model' argument, the partition number will be appended to the given value")
    group.add_option("--seq-parts", "--sequence-partitions", dest="sequence_partitions", action="store", help="use a chord sequence index to partition the inputs. Input type (bulk) must support association of the inputs with chord sequences by id. Sequences in the given sequence index file are partitioned n ways (--partitions) and the inputs are processed according to their associated sequence.")
    group.add_option("--continue", "--skip-done", dest="skip_done", action="store_true", help="skip any inputs for which a readable results file already exists. This is useful for continuing a bulk job that was stopped in the middle")
    ###
    group = OptionGroup(optparser, "Parser", "Parser, supertagger and backoff parser")
    optparser.add_option_group(group)
    group.add_option("-d", "--derivations", dest="derivations", action="store_true", help="keep derivation logs during parse.")
    group.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.")
    # Parser options
    group.add_option("-p", "--parser", dest="parser", action="store", help="use the named parser algorithm instead of the default. Use '-p help' to see the list of available parsers. Default: %s" % settings.DEFAULT_PARSER, default=settings.DEFAULT_PARSER)
    group.add_option("--popt", "--parser-options", dest="popts", action="append", help="specify options for the parser. Type '--popt help', using '--parser <name>' to select a parser module, to get a list of options.")
    # Tagger options
    group.add_option("-t", "--tagger", "--supertagger", dest="supertagger", action="store", help="run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s" % settings.DEFAULT_SUPERTAGGER, default=settings.DEFAULT_SUPERTAGGER)
    group.add_option("--topt", "--tagger-options", dest="topts", action="append", help="specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options.")
    # Backoff options
    group.add_option("-b", "--backoff", "--noparse", dest="backoff", action="store", help="use the named backoff model as a backoff if the parser produces no results")
    group.add_option("--bopt", "--backoff-options", "--backoff-options", "--npo", dest="backoff_opts", action="append", help="specify options for the  backoff model. Type '--npo help', using '--backoff <name>' to select a backoff modules, to get a list of options.")
    ###
    # Multiprocessing options
    group = OptionGroup(optparser, "Multiprocessing")
    optparser.add_option_group(group)
    group.add_option("--processes", dest="processes", action="store", type="int", help="number of processes to create to perform parses in parallel. Default: 1, i.e. no process pool. Use -1 to create a process for every input", default=1)
    ###
    # Output options
    group = OptionGroup(optparser, "Output")
    optparser.add_option_group(group)
    group.add_option("--output", dest="output", action="store", help="directory name to output parse results to. A filename specific to the individual input will be appended to this")
    group.add_option("--topn", dest="topn", action="store", type="int", help="limit the number of final results to store in the output file to the top n by probability. By default, stores all")
    group.add_option("--output-opts", "--oopts", dest="output_opts", action="store", help="options that affect the output formatting. Use '--output-opts help' for a list of options.")
    group.add_option("-a", "--atomic-results", dest="atoms_only", action="store_true", help="only include atomic categories in the results.")
    group.add_option("-l", "--latex", dest="latex", action="store_true", help="output all results as Latex source. Used to produce a whole Latex document, but doesn't any more")
    group.add_option("--all-times", dest="all_times", action="store_true", help="display all timing information on semantics in output.")
    group.add_option("-v", "--debug", dest="debug", action="store_true", help="output verbose debugging information.")
    group.add_option("--time", dest="time", action="store_true", help="time how long the parse takes and output with the results.")
    group.add_option("--no-results", dest="no_results", action="store_true", help="don't print out the parse results at the end. Obviously you'll want to make sure they're going to a file (--output). This is useful for bulk parse jobs, where the results produce a lot of unnecessary output")
    group.add_option("--no-progress", dest="no_progress", action="store_true", help="don't output the summary of completed sequences after each one finishes")
    ###
    # Output analysis and harmonical
    group = OptionGroup(optparser, "Output processing", "Output analysis and harmonical")
    optparser.add_option_group(group)
    group.add_option("--harmonical", dest="harmonical", action="store", help="use the harmonical to play the chords justly intoned according to the top result and output to a wave file.")
    group.add_option("--enharmonical", dest="enharmonical", action="store", help="use the harmonical to play the chords in equal temperament and output to a wave file.")
    group.add_option("--midi", dest="midi", action="store_true", help="generate MIDI files from the harmonical, instead of wave files.")
    group.add_option("--tempo", dest="tempo", action="store", type=int, help="tempo to use for the generated music (see --harmonical/--enharmonical). Default: 120", default=120)
    group.add_option("--lh-analysis", dest="lh_analysis", action="store_true", help="output the Longuet-Higgins space interpretation of the semantics for each result.")
    group.add_option("--lh-coordinates", dest="lh_coord", action="store_true", help="like lh-analysis, but displays the coordinates of the points instead of their names.")
    ###
    # Logging options
    group = OptionGroup(optparser, "Logging")
    optparser.add_option_group(group)
    group.add_option("--long-progress", dest="long_progress", action="store_true", help="print a summary of the chart so far after each chord/word has been processed.")
    group.add_option("--progress", "--short-progress", dest="short_progress", action="store_true", help="print a small amount of information out during parsing to indicate progress.")
    group.add_option("--logger", dest="logger", action="store", help="directory to put parser logging in. A filename based on an identifier for each individual input will be appended.")
    ###
    # Shell options
    group = OptionGroup(optparser, "Shell", "Interactive shell for inspecting results and parser state")
    optparser.add_option_group(group)
    group.add_option("-i", "--interactive", dest="interactive", action="store_true", help="enter interactive mode after parsing.")
    group.add_option("--error", dest="error_shell", action="store_true", help="catch any errors, report them and then enter the interactive shell. This also catches keyboard interrupts, so you can use it to halt parsing and enter the shell.")
    
    # Read in command line options and args
    options, clinput = parse_args_with_config(optparser)

    ########################### Option processing ####################
    
    # Get log level option first, so we can start using the logger
    if options.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    # Set up a logger
    init_logging(log_level)
    
    if options.latex:
        settings.OPTIONS.OUTPUT_LATEX = True
    
    if options.logger:
        # Directory
        parse_logger_dir = options.logger
        check_directory(parse_logger_dir)
    else:
        parse_logger_dir = None
    
    ######## Grammar ########
    # Check the grammar actually exists
    grammar_names = get_grammar_names()
    if options.grammar is not None and options.grammar not in grammar_names:
        # This is not a valid grammar name
        logger.error("The grammar '%s' does not exist. Possible "\
            "grammars are: %s." % (options.grammar, ", ".join(grammar_names)))
        return 1
    grammar = get_grammar(options.grammar)
        
    ######## Parser ########
    # Load the requested parser
    from jazzparser.parsers import PARSERS
    if options.parser.lower() == "help":
        print "Available parsers are: %s" % ", ".join(PARSERS)
        return 0
    try:
        parser_cls = get_parser(options.parser)
    except ParserLoadError:
        logger.error("The parser '%s' could not be loaded. Possible "\
            "parsers are: %s" % (options.parser, ", ".join(PARSERS)))
        return 1
        
    # Get parser options
    if options.popts is not None:
        poptstr = options.popts
        if "help" in [s.strip().lower() for s in poptstr]:
            # Output this tagger's option help
            from jazzparser.utils.options import options_help_text
            print options_help_text(parser_cls.PARSER_OPTIONS, intro="Available options for selected parser")
            return 0
        poptstr = ":".join(poptstr)
    else:
        poptstr = ""
    popts = ModuleOption.process_option_string(poptstr)
    # Check that the options are valid
    try:
        parser_cls.check_options(popts)
    except ModuleOptionError, err:
        logger.error("Problem with parser options (--popt): %s" % err)
        return 1
예제 #14
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Trains a chord labeling model using the given "\
        "input data. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="append", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Logging output
    parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end")
    options, arguments = parse_args_with_config(parser)
    
    grammar = Grammar()
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif "help" in [opt.lower() for opt in options.training_opts]:
        print options_help_text(HPChordLabeler.TRAINING_OPTIONS, intro="Training options:")
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_string(options.training_opts)
        
    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]
    
    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(single=False, bulk=True))
    
    # Only partition the chord data, not the MIDI data
    if options.partitions is not None and not \
            (isinstance(input_data, MidiTaggerTrainingBulkInput) and \
             input_data.chords is not None):
        print >>sys.stderr, "Can only partition chord data and no chord data "\
            "was supplied"
        sys.exit(1)
    
    if options.partitions:
        # The input includes chord training data
        parts = input_data.chords.get_partitions(options.partitions)[1]
        models = [("%s%d" % (model_name,num),chord_data) \
            for num,chord_data in enumerate(parts)]
    else:
        models = [(model_name,None)]
    
    for part_name,chord_data in models:
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None
        
        # Create a fresh model with this name
        model = HPChordLabeler.train(input_data, part_name, 
                                     logger=logger, 
                                     options=training_opts,
                                     chord_data=chord_data)
        print "Trained model %s" % (part_name)
예제 #15
0
def main():
    usage = "%prog [<options>]"
    description = "Runs a supertagger from the Jazz Parser to tag some input "\
        "but just outputs the results, rather than continuing to parse."
    optparser = OptionParser(usage=usage, description=description)

    # Tagger options
    optparser.add_option(
        "-t",
        "--tagger",
        "--supertagger",
        dest="supertagger",
        action="store",
        help=
        "run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s"
        % settings.DEFAULT_SUPERTAGGER,
        default=settings.DEFAULT_SUPERTAGGER)
    optparser.add_option(
        "--topt",
        "--tagger-options",
        dest="topts",
        action="append",
        help=
        "specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options."
    )
    # Commonly-used misc
    optparser.add_option("-g",
                         "--grammar",
                         dest="grammar",
                         action="store",
                         help="use the named grammar instead of the default.")
    # File input options
    optparser.add_option(
        "--file",
        "-f",
        dest="file",
        action="store",
        help=
        "use a file to get parser input from. Use --filetype to specify the type of the file."
    )
    optparser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords",
        default='chords')
    optparser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Misc options
    optparser.add_option("-v",
                         "--debug",
                         dest="debug",
                         action="store_true",
                         help="output verbose debugging information.")
    optparser.add_option(
        "-i",
        "--interactive",
        dest="interactive",
        action="store_true",
        help=
        "instead of just outputing all tags in one go, wait for user input between each iteration of adaptive supertagging"
    )
    # Logging options
    optparser.add_option(
        "--logger",
        dest="logger",
        action="store",
        help=
        "directory to put parser logging in. A filename based on an identifier for each individual input will be appended."
    )
    # Read in command line options and args
    options, clinput = parse_args_with_config(optparser)

    ########################### Option processing ####################
    if options.logger:
        # Directory
        parse_logger_dir = options.logger
        check_directory(parse_logger_dir)
    else:
        parse_logger_dir = None

    ######## Grammar ########
    # Read in the grammar
    grammar = get_grammar(options.grammar)

    ######## Supertagger ########
    # Load the supertagger requested
    if options.supertagger.lower() == "help":
        print "Available taggers are: %s" % ", ".join(TAGGERS)
        return 0
    try:
        tagger_cls = get_tagger(options.supertagger)
    except TaggerLoadError:
        logger.error("The tagger '%s' could not be loaded. Possible "\
            "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS)))
        return 1

    # Get supertagger options before initializing the tagger
    if options.topts is not None:
        toptstr = options.topts
        if "help" in [s.strip().lower() for s in toptstr]:
            # Output this tagger's option help
            from jazzparser.utils.options import options_help_text
            print options_help_text(
                tagger_cls.TAGGER_OPTIONS,
                intro="Available options for selected tagger")
            return 0
        toptstr = ":".join(toptstr)
    else:
        toptstr = ""
    topts = ModuleOption.process_option_string(toptstr)
    # Check that the options are valid
    try:
        tagger_cls.check_options(topts)
    except ModuleOptionError, err:
        print "Problem with tagger options (--topt): %s" % err
        return 1
예제 #16
0
def main():
    usage = "%prog [<options>] <model-name> <training-input>"
    description = "Training of PCFG models."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \
        help="Number of partitions to divide the data into. "\
            "For train, divides the input file, trains a model on each "\
            "partition's complement and appends partition number to "\
            "the model names. For del, appends partition numbers to model "\
            "names and deletes all the models. Recache does similarly. "\
            "Has no effect for parse.")
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options"
    )
    parser.add_option("--debug",
                      dest="debug",
                      action="store_true",
                      help="Output verbose logging information to stderr")
    parser.add_option("-g",
                      "--grammar",
                      dest="grammar",
                      action="store",
                      help="use the named grammar instead of the default.")
    options, arguments = parse_args_with_config(parser)

    if options.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.WARN
    # Create a logger for training
    logger = create_logger(log_level=log_level, name="training", stderr=True)

    # Load a grammar
    grammar = get_grammar(options.grammar)
    # Get the pcfg model class for the formalism
    PcfgModel = grammar.formalism.PcfgModel

    # Parse the option string
    if options.training_opts is None:
        opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(PcfgModel.TRAINING_OPTIONS,
                                intro="Training options for PCFGs")
        sys.exit(0)
    else:
        opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            PcfgModel.TRAINING_OPTIONS)

    if len(arguments) == 0:
        print >> sys.stderr, "Specify a model name"
        models = PcfgModel.list_models()
        print >> sys.stderr, "Available models: %s" % ", ".join(models)
        sys.exit(1)
    model_name = arguments[0]
    print "Model base name:", model_name

    if options.partitions is not None:
        parts = [(i, "%s%d" % (model_name, i))
                 for i in range(options.partitions)]
    else:
        parts = [(None, model_name)]

    if len(arguments) < 2:
        print >> sys.stderr, "Specify an input file to read sequence data from"
        sys.exit(1)
    # Read in the training data from the given file
    seqs = SequenceIndex.from_file(arguments[1])

    if options.partitions is not None:
        # Prepare each training partition
        datasets = holdout_partition(seqs.sequences, options.partitions)
    else:
        datasets = [seqs.sequences]

    for dataset, (parti, part_model) in zip(datasets, parts):
        # Train the named model on the sequence data
        model = PcfgModel.train(part_model,
                                dataset,
                                opts,
                                grammar=grammar,
                                logger=logger)
        model.save()
        print "Trained model", part_model
예제 #17
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a backoff builder model using the given "\
        "input data. Specify a model type (ngram, etc) and a name to "\
        "identify it. The data file should be a stored SequenceIndex file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    options, arguments = parse_args_with_config(parser)

    if len(arguments) < 3:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_type = arguments[0]
    model_name = arguments[1]

    builder_cls = get_backoff_builder(model_type)
    model_cls = builder_cls.MODEL_CLASS

    # Load the sequence data from a dbinput file
    input_data = command_line_input(
        filename=filename,
        filetype=options.filetype,
        options=options.file_options,
        allowed_types=['bulk-db', 'bulk-db-annotated'])

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS,
                                intro="Training options for %s" %
                                model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            model_cls.TRAINING_OPTIONS)

    if options.partitions is not None:
        parts = holdout_partition(input_data, options.partitions)
        models = [(builder_cls.partition_model_name(model_name,num),seqs) for \
                        num,seqs in enumerate(parts)]
    else:
        models = [(model_name, input_data)]

    for part_name, seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        # Train it with the loaded data
        model.train(seqs)
        model.save()
        print "Trained model %s" % (part_name)
예제 #18
0
 else:
     for m in formalism.semantics_distance_metrics:
         if m.name == options.metric:
             metric_cls = m
             break
     else:
         # No metric found matching this name
         print "No metric '%s'" % options.metric
         sys.exit(1)
 print >>sys.stderr, "Using distance metric: %s" % metric_cls.name
 # Now process the metric options
 if options.mopts is not None:
     moptstr = options.mopts
     if "help" in [s.strip().lower() for s in moptstr]:
         # Output this parser's option help
         print options_help_text(metric_cls.OPTIONS, intro="Available options for metric '%s'" % metric_cls.name)
         sys.exit(0)
     moptstr = ":".join(moptstr)
 else:
     moptstr = ""
 mopts = ModuleOption.process_option_string(moptstr)
 # Instantiate the metric with these options
 metric = metric_cls(options=mopts)
 
     
 if len(arguments) < 2:
     print >>sys.stderr, "Specify a song corpus name and one or more files to read results from"
     sys.exit(1)
 
 # First argument is an TonalSpaceAnalysisSet
 corpus_name = arguments[0]
예제 #19
0
        for m in formalism.semantics_distance_metrics:
            if m.name == options.metric:
                metric_cls = m
                break
        else:
            # No metric found matching this name
            print "No metric '%s'" % options.metric
            sys.exit(1)
    print >> sys.stderr, "Using distance metric: %s" % metric_cls.name
    # Now process the metric options
    if options.mopts is not None:
        moptstr = options.mopts
        if "help" in [s.strip().lower() for s in moptstr]:
            # Output this parser's option help
            print options_help_text(metric_cls.OPTIONS,
                                    intro="Available options for metric '%s'" %
                                    metric_cls.name)
            sys.exit(0)
        moptstr = ":".join(moptstr)
    else:
        moptstr = ""
    mopts = ModuleOption.process_option_string(moptstr)
    # Instantiate the metric with these options
    metric = metric_cls(options=mopts)

    if len(arguments) < 2:
        print >> sys.stderr, "Specify a song corpus name and one or more files to read results from"
        sys.exit(1)

    # First argument is an TonalSpaceAnalysisSet
    corpus_name = arguments[0]
예제 #20
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Loads a chord labeling model and uses it to assign chord "\
        "labels to the given MIDI file."
    parser = OptionParser(usage=usage, description=description)
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='segmidi')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Labeling options
    parser.add_option(
        "--labeler-options",
        "--lopt",
        dest="labeler_options",
        action="append",
        help=
        "options for the labeler. Type '--lopt help' for a list of available options."
    )
    parser.add_option(
        "--no-key",
        "--nk",
        dest="no_key",
        action="store_true",
        help="merge together labels with the same key (same as --lopt nokey)")
    # Output options
    parser.add_option(
        "--single",
        "-1",
        dest="single",
        action="store_true",
        help=
        "show only one chord per time segment (same as --lopt n=1, but formats the output in a simpler way)"
    )
    parser.add_option(
        '-r',
        '--realize',
        dest="realize",
        action="store",
        help="realize the chord sequence as a midi file, overlaid on the input"
    )
    parser.add_option(
        '--chords-only',
        dest="chords_only",
        action="store_true",
        help=
        "only realize the chords: don't overlay on the input midi (only works with -r)"
    )
    options, arguments = parse_args_with_config(parser)

    if options.labeler_options is not None and "help" in options.labeler_options:
        print options_help_text(HPChordLabeler.LABELING_OPTIONS,
                                intro="Options for HP chord labeler")
        sys.exit(0)

    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model name and an input "\
            "(MIDI) data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    # Process the labeler options
    lopt_dict = ModuleOption.process_option_string(options.labeler_options)
    if options.single:
        # No point in getting more than one label, since we only display one
        lopt_dict['n'] = 1
    if options.no_key:
        # Just set the nokey option
        lopt_dict['nokey'] = True

    # Check they're valid before doing anything else
    HPChordLabeler.process_labeling_options(lopt_dict)

    input_data = command_line_input(filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=['segmidi', 'bulk-segmidi'])
    bulk = not is_bulk_type(type(input_data))
    if bulk:
        input_data = [input_data]

    for i, data in enumerate(input_data):
        input_stream = data.stream
        print "Read midi data in %d segments" % len(data)

        # Load the model
        model = HPChordLabeler.load_model(model_name)
        # Perform labeling
        labels = model.label(data, options=lopt_dict)
        # Try labeling as it will be passed to the tagger
        labs = model.label_lattice(data, options=lopt_dict)

        if options.single:
            # Special output for single label output
            print ", ".join(["%s" % timelabs[0][0] for timelabs in labels])
        else:
            # Print out the labels for each timestep
            for time, timelabs in enumerate(labels):
                print "%d: %s" % (time, ", ".join([
                    "%s (%.2e)" % (label, prob) for (label, prob) in timelabs
                ]))

        if options.realize is not None:
            # Get the single best chord label for each time
            best_labels = [timelabs[0][0] for timelabs in labels]
            # Realize as a midi file
            print "Realizing output chord sequence"
            real = ChordSequenceRealizer(best_labels,
                                         model.chord_vocab,
                                         resolution=input_stream.resolution,
                                         chord_length=data.time_unit,
                                         text_events=True)
            if options.chords_only:
                # Don't overlay
                stream = real.generate(offset=data.tick_offset)
            else:
                stream = real.generate(overlay=input_stream,
                                       offset=data.tick_offset)

            if bulk:
                filename = "%s-%d" % (options.realize, i)
            else:
                filename = options.realize
            write_midifile(stream, filename)
예제 #21
0
def main():
    usage = "%prog [options] <song-set> <results-file0> [<results-file1> ...]"
    parser = OptionParser(usage=usage)
    parser.add_option(
        "--popt",
        "--parser-options",
        dest="popts",
        action="append",
        help=
        "specify options for the parser that interprets the gold standard annotations. Type '--popt help' to get a list of options (we use a DirectedCkyParser)"
    )
    parser.add_option(
        "-m",
        "--metric",
        dest="metric",
        action="store",
        help=
        "semantics distance metric to use. Use '-m help' for a list of available metrics"
    )
    parser.add_option(
        "--mopt",
        "--metric-options",
        dest="mopts",
        action="append",
        help=
        "options to pass to the semantics metric. Use with '--mopt help' with -m to see available options"
    )
    parser.add_option(
        "-r",
        "--print-results",
        dest="print_results",
        action="store",
        default=5,
        type="int",
        help=
        "number of top search results to print for each query (parse result). Default: 5. Use -1 to print distances from all songs in the corpus"
    )
    parser.add_option(
        "-g",
        "--gold-only",
        dest="gold_only",
        action="store_true",
        help=
        "skip results that have no gold standard sequence associated with them (we can't tell which is the right answer for these)"
    )
    parser.add_option(
        "--mc",
        "--metric-computation",
        dest="metric_computation",
        action="store_true",
        help=
        "output the computation information for the metric between the parse result and each top search result"
    )
    options, arguments = parser.parse_args()

    # For now, we always use the music_halfspan formalism with this script
    # If we wanted to make it generic, we'd just load the formalism according
    #  to a command-line option
    formalism = Formalism

    # Process parser options
    if options.popts is not None:
        poptstr = options.popts
        if "help" in [s.strip().lower() for s in poptstr]:
            # Output this parser's option help
            print options_help_text(
                DirectedCkyParser.PARSER_OPTIONS,
                intro="Available options for gold standard interpreter")
            sys.exit(0)
        poptstr = ":".join(poptstr)
    else:
        poptstr = ""
    popts = ModuleOption.process_option_string(poptstr)
    # Check that the options are valid
    try:
        DirectedCkyParser.check_options(popts)
    except ModuleOptionError, err:
        logger.error("Problem with parser options (--popt): %s" % err)
        sys.exit(1)
예제 #22
0
def main():
    usage = "%prog [options] <seq-file>"
    description = "Parses a sequence from a sequence index file using the "\
        "annotations stored in the same file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "--popt",
        "--parser-options",
        dest="popts",
        action="append",
        help=
        "specify options for the parser. Type '--popt help' to get a list of options (we use a DirectedCkyParser)"
    )
    parser.add_option("--derivations",
                      "--deriv",
                      dest="derivations",
                      action="store_true",
                      help="print out derivation traces of all the results")
    parser.add_option("--index",
                      "-i",
                      dest="index",
                      action="store",
                      type="int",
                      help="parse just the sequence with this index")
    parser.add_option("--quiet",
                      "-q",
                      dest="quiet",
                      action="store_true",
                      help="show only errors in the output")
    parser.add_option(
        "--tonal-space",
        "--ts",
        dest="tonal_space",
        action="store_true",
        help="show the tonal space path (with -q, shows only paths)")
    parser.add_option(
        "--output-set",
        "-o",
        dest="output_set",
        action="store",
        help="store the analyses to a tonal space analysis set with this name")
    parser.add_option(
        "--trace-parse",
        "-t",
        dest="trace_parse",
        action="store_true",
        help=
        "output a trace of the shift-reduce parser's operations in producing the full interpretation from the annotations"
    )
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "You must specify a sequence file"
        sys.exit(1)

    if options.popts is not None:
        poptstr = options.popts
        if "help" in [s.strip().lower() for s in poptstr]:
            # Output this tagger's option help
            print options_help_text(
                DirectedCkyParser.PARSER_OPTIONS,
                intro="Available options for the directed parser")
            return 0
    else:
        poptstr = ""
    popts = ModuleOption.process_option_string(poptstr)

    grammar = get_grammar()
    if options.quiet:
        logger = create_plain_stderr_logger(log_level=logging.ERROR)
    else:
        logger = create_plain_stderr_logger()

    if options.trace_parse:
        parse_logger = logger
    else:
        parse_logger = None

    seq_index = SequenceIndex.from_file(arguments[0])
    # Get the chord sequence(s)
    if options.index is None:
        seqs = seq_index.sequences
    else:
        seqs = [seq_index.sequence_by_index(options.index)]
    logger.info("%d sequences\n" % len(seqs))

    full_analyses = []
    stats = {
        'full': 0,
        'partial': 0,
        'fail': 0,
    }
    # Try parsing every sequence
    for seq in seqs:
        logger.info("====== Sequence %s =======" % seq.string_name)
        try:
            results = parse_sequence_with_annotations(
                seq, grammar, logger=logger, parse_logger=parse_logger)
        except ParseError, err:
            logger.error("Error parsing: %s" % err)
            stats['fail'] += 1
        else:
            # This may have resulted in multiple partial parses
            logger.info("%d partial parses" % len(results))

            if len(results) == 1:
                stats['full'] += 1
            else:
                stats['partial'] += 1

            if options.derivations:
                # Output the derivation trace for each partial parse
                for result in results:
                    print
                    print result.derivation_trace

            if options.tonal_space:
                # Output the tonal space coordinates
                path = grammar.formalism.sign_to_coordinates(results[0])
                for i, point in enumerate(path):
                    print "%d, %d: %s" % (seq.id, i, point)

            # Only include a result in the output analyses if it was a full parse
            if len(results) == 1:
                full_analyses.append((seq.string_name, results[0].semantics))
            else:
                logger.warn("%s was not included in the output analyses, "\
                    "since it was not fully parsed" % seq.string_name)
예제 #23
0
파일: tag.py 프로젝트: johndpope/jazzparser
def main():
    usage = "%prog [<options>]"
    description = "Runs a supertagger from the Jazz Parser to tag some input "\
        "but just outputs the results, rather than continuing to parse."
    optparser = OptionParser(usage=usage, description=description)
    
    # Tagger options
    optparser.add_option("-t", "--tagger", "--supertagger", dest="supertagger", action="store", help="run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s" % settings.DEFAULT_SUPERTAGGER, default=settings.DEFAULT_SUPERTAGGER)
    optparser.add_option("--topt", "--tagger-options", dest="topts", action="append", help="specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options.")
    # Commonly-used misc
    optparser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.")
    # File input options
    optparser.add_option("--file", "-f", dest="file", action="store", help="use a file to get parser input from. Use --filetype to specify the type of the file.")
    optparser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords", default='chords')
    optparser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Misc options
    optparser.add_option("-v", "--debug", dest="debug", action="store_true", help="output verbose debugging information.")
    optparser.add_option("-i", "--interactive", dest="interactive", action="store_true", help="instead of just outputing all tags in one go, wait for user input between each iteration of adaptive supertagging")
    # Logging options
    optparser.add_option("--logger", dest="logger", action="store", help="directory to put parser logging in. A filename based on an identifier for each individual input will be appended.")
    # Read in command line options and args
    options, clinput = parse_args_with_config(optparser)
    
    ########################### Option processing ####################
    if options.logger:
        # Directory
        parse_logger_dir = options.logger
        check_directory(parse_logger_dir)
    else:
        parse_logger_dir = None
    
    ######## Grammar ########
    # Read in the grammar
    grammar = get_grammar(options.grammar)
        
    ######## Supertagger ########
    # Load the supertagger requested
    if options.supertagger.lower() == "help":
        print "Available taggers are: %s" % ", ".join(TAGGERS)
        return 0
    try:
        tagger_cls = get_tagger(options.supertagger)
    except TaggerLoadError:
        logger.error("The tagger '%s' could not be loaded. Possible "\
            "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS)))
        return 1
        
    # Get supertagger options before initializing the tagger
    if options.topts is not None:
        toptstr = options.topts
        if "help" in [s.strip().lower() for s in toptstr]:
            # Output this tagger's option help
            from jazzparser.utils.options import options_help_text
            print options_help_text(tagger_cls.TAGGER_OPTIONS, intro="Available options for selected tagger")
            return 0
        toptstr = ":".join(toptstr)
    else:
        toptstr = ""
    topts = ModuleOption.process_option_string(toptstr)
    # Check that the options are valid
    try:
        tagger_cls.check_options(topts)
    except ModuleOptionError, err:
        print "Problem with tagger options (--topt): %s" % err
        return 1
예제 #24
0
def main():
    usage = "%prog [options] <model_name> <input-file>"
    description = (
        "Trains a model for the RaphSto chord labelling "
        "algorithm on a file that contains a list of midi files with "
        "training options"
    )
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "-p",
        "--partitions",
        dest="partitions",
        action="store",
        type="int",
        help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.",
    )
    parser.add_option(
        "--opts",
        dest="opts",
        action="store",
        help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.",
    )
    parser.add_option(
        "--proc",
        "--processes",
        dest="processes",
        action="store",
        type="int",
        help="number of parallel processes to spawn for the training. Use -1 to spawn one per training sequence (after splitting: see split_length)",
        default=1,
    )
    parser.add_option(
        "--max-length",
        dest="max_length",
        action="store",
        type="int",
        help="limits the length of the training midi sequences in chunks",
    )
    parser.add_option(
        "--split-length",
        dest="split_length",
        action="store",
        type="int",
        help="limits the length of the training midi sequences in chunks, but instead of throwing away everything after the first N chunks, splits it off as if it were starting a new sequence. This is good for multiprocessing, since many short sequences can be multitasked, whilst few long ones cannot",
    )
    parser.add_option(
        "--min-length",
        dest="min_length",
        action="store",
        type="int",
        help="ignores any sequences under this number of chunks. This is useful with --split-length, which can leave very short sequences from the end of a split sequence",
    )
    parser.add_option(
        "--progress-out",
        dest="progress_out",
        action="store",
        help="output logging info to a file instead of the command line",
    )
    parser.add_option(
        "--init-model",
        dest="init_model",
        action="store",
        help="initialize the model using parameters from an already trained model",
    )
    parser.add_option(
        "--init-ctrans",
        dest="init_ctrans",
        action="store",
        help="initialize the chord transition distribution using these parameters. Comma-separated list of params given as C0->C1-P, where C0 and C1 are chords (I, II, etc) and P is a float probability",
    )
    parser.add_option(
        "--chord-set",
        dest="chord_set",
        action="store",
        help="use a chord set other than the default. Use value 'help' to see a list. Has no effect in combination with --init-model, since the old model's chord set will be used",
    )
    parser.add_option(
        "-m",
        "--model-type",
        dest="model_type",
        action="store",
        help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()),
        default="standard",
    )
    options, arguments = parse_args_with_config(parser)

    if options.opts is not None and options.opts == "help":
        print options_help_text(RaphstoBaumWelchTrainer.OPTIONS, intro="Training options for Raphael and Stoddard HMMs")
        sys.exit(0)
    opts = ModuleOption.process_option_string(options.opts)

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    print >> sys.stderr, "Raphsto training beginning at %s" % datetime.now().isoformat(" ")
    # Create a logger to output the progress of the training to stdout or a file
    if options.progress_out is not None:
        stdout = False
        logfile = options.progress_out
        print >> sys.stderr, "Outputing logging info to %s" % logfile
    else:
        stdout = True
        logfile = None
        print >> sys.stderr, "Outputing logging to stdout"
    logger = create_logger(name="raphsto_train", filename=logfile, stdout=stdout)
    logger.info("Raphael and Stoddard HMM model training")

    if options.model_type not in MODEL_TYPES:
        print >> sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]

    if options.chord_set == "help":
        print "Available chord sets: %s" % ", ".join(constants.CHORD_SETS.keys())
        sys.exit(0)
    elif options.chord_set is not None:
        # Check this chord set exists
        if options.chord_set not in constants.CHORD_SETS:
            print >> sys.stderr, "Chord set '%s' does not exist" % options.chord_set
            sys.exit(1)
        else:
            logger.info("Using chord set '%s'" % options.chord_set)

    # Read in the training data
    midis = InputSourceFile(filename)
    handlers = midis.get_handlers()
    logger.info("Reading in %d midi files..." % len(midis.inputs))
    training_data = []
    for i, mh in enumerate(handlers):
        logger.info("%s: %s" % (i, midis.inputs[i][0]))
        emissions = mh.get_emission_stream()[0]
        if options.max_length is not None and len(emissions) > options.max_length:
            logger.info("Truncating file %d to %d chunks (was %d)" % (i, options.max_length, len(emissions)))
            emissions = emissions[: options.max_length]
        if options.split_length is not None:
            logger.info("Splitting sequence %d into sequence no longer " "than %d chunks" % (i, options.split_length))
            # Split up the sequence if it's too long
            while len(emissions) > options.split_length:
                training_data.append(emissions[: options.split_length])
                emissions = emissions[options.split_length :]
        training_data.append(emissions)

    if options.min_length is not None:
        # Make sure there are no sequences under the minimum length
        # Just throw away any that are
        before_chuck = len(training_data)
        training_data = [seq for seq in training_data if len(seq) >= options.min_length]
        if len(training_data) != before_chuck:
            logger.info(
                "Threw away %d short sequences (below %d chunks)"
                % ((before_chuck - len(training_data)), options.min_length)
            )

    logger.info(
        "Training on %d sequences. Lengths: %s"
        % (len(training_data), ", ".join(str(len(seq)) for seq in training_data))
    )

    if options.partitions is not None:
        parts = holdout_partition(training_data, options.partitions)
        models = [("%s%d" % (model_name, num), data) for num, data in enumerate(parts)]
    else:
        models = [(model_name, training_data)]

    # Number of processes to use
    if options.processes == -1:
        # Special value: means number of training sequences (one process per sequence)
        processes = len(training_data)
    else:
        processes = options.processes

    for part_name, data in models:
        # Instantiate a fresh model with this name
        logger.info("Training model '%s' on %d midis" % (part_name, len(data)))
        if options.init_model is not None:
            logger.info("Initializing using parameters from model '%s'" % options.init_model)
            # Load an already trained model as initialization
            model = model_cls.initialize_existing_model(options.init_model, model_name=part_name)
        else:
            # TODO: make these probs an option
            ctype_params = (0.5, 0.3, 0.2)
            logger.info("Initializing to naive chord types using parameters: " "%s, %s, %s" % ctype_params)
            init_kwargs = {"model_name": part_name}
            if options.chord_set is not None:
                # Specify a chord set for the model
                init_kwargs["chord_set"] = options.chord_set
            model = model_cls.initialize_chord_types(ctype_params, **init_kwargs)

            # Initialize the chord transition probabilities if given
            if options.init_ctrans is not None:
                logger.info("Initializing chord transition distribution to %s" % options.init_ctrans)
                model.set_chord_transition_probabilities(options.init_ctrans)
        # Retrain it with the loaded data
        trainer = model_cls.get_trainer()(model, options=opts)
        trainer.train(data, logger=logger, processes=processes, save_intermediate=True)
    print >> sys.stderr, "Training terminating at %s" % datetime.now().isoformat(" ")
예제 #25
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a supertagging model using the given "\
        "input data. Specify a model type (baseline1, etc) and a name to "\
        "identify it. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file. "\
        "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS)
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Logging output
    parser.add_option(
        '--log',
        dest="log",
        action="store",
        help=
        "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end"
    )
    options, arguments = parse_args_with_config(parser)

    grammar = Grammar()

    # Get the model type first: we might not need the other args
    if len(arguments) == 0:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
    model_type = arguments[0]

    if model_type not in TRAINABLE_MODELS:
        print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \
            (model_type, ", ".join(TRAINABLE_MODELS))
        sys.exit(1)
    if model_type not in TAGGERS:
        print >>sys.stderr, "'%s' isn't a registered model type. Check that "\
            "the name in TRAINABLE_MODELS is correct" % model_type
        sys.exit(1)

    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >> sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (
            tagger_cls.__name__)
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS,
                                intro="Training options for %s" %
                                model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            model_cls.TRAINING_OPTIONS)

    # Get the rest of the args
    if len(arguments) < 3:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_name = arguments[1]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(
                                        single=False, bulk=True))

    if options.partitions is not None and options.partitions > 1:
        parts = input_data.get_partitions(options.partitions)[1]
        models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \
                                                num,seqs in enumerate(parts)]
    else:
        models = [(model_name, input_data)]

    for part_name, seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None

        # Train the model with the loaded data
        model.train(seqs, logger=logger)
        model.save()
        print "Trained model %s" % (part_name)
예제 #26
0
def command_line_input(filename=None, filetype=None, options="", \
        allowed_types=None, default_type=None):
    """
    Utility function for processing file input options from the command line.
    Pass in as args the values straight from the command line options to 
    select a filename, filetype and list of options.
    
    Typical command-line options for this purpose (for an optparse option parser C{op})::
     op.add_option("--file", "-f", dest="file", action="store", help="use a file to get input from")
     op.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Use '--filetype help' for a list of available types")
     op.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Use '--fopt help', with '--ft <type>', for a list of available options")
    Then you can call this function as::
     command_line_input(filename=options.file, filetype=options.filetype, options=options.file_options)
    
    @type allowed_types: list of strs
    @param allowed_types: types of input you want the user to be able to give.
        If not given, all types are allowed
    @type default_type: str
    @param default_type: filetype to assume if no other filetype is given
    @rtype: L{InputReader} subclass
    @return: the input wrapper of appropriate type, or None if no input file 
        was given
    
    """
    if allowed_types is None:
        allowed_types = get_input_type_names()

    if filetype is None and default_type is not None:
        filetype = default_type

    # Catch a request for filetype help
    if filetype is not None and filetype.lower() == "help":
        # Output possible file types
        print "Allowed input types: %s" % ", ".join(allowed_types)
        sys.exit(0)

    # Check that the filetype is valid and get the input type class if it is
    input_type = get_input_type(filetype)
    type_name = input_type_name(input_type)
    if input_type is None:
        raise InputTypeError, "Unknown filetype '%s'. Allowed types are: %s" % \
            (filetype, ", ".join(allowed_types))
    if type_name not in allowed_types:
        raise InputTypeError, "Cannot accept input of type '%s'. Allowed "\
            "types are: %s" % (filetype, ", ".join(allowed_types))

    if options is not None and options.lower() == "help":
        # Output help text
        from jazzparser.utils.options import options_help_text
        print options_help_text(input_type.FILE_INPUT_OPTIONS,
                                intro="Available options for input type %s" %
                                type_name)
        sys.exit(0)

    if filename is None:
        return None

    # First get a dict of the options
    file_options = ModuleOption.process_option_string(options)
    # Process the options as appropriate for this type
    file_options = input_type.process_option_dict(file_options)

    # Instantiate the input from the file as appropriate for the input type
    input_data = input_type.from_file(filename, file_options)
    return input_data
예제 #27
0
    def run(self, args, state):
        from jazzparser.formalisms.music_halfspan.evaluation import \
                        tonal_space_local_alignment, tonal_space_distance
        from jazzparser.formalisms.music_halfspan import Formalism

        metric_name = self.options['metric']
        if metric_name == "help":
            # Print a list of available metrics
            print ", ".join([
                metric.name for metric in Formalism.semantics_distance_metrics
            ])
            return

        if len(args) == 0:
            resnum = 0
        else:
            resnum = int(args[0])

        if self.options['average'] and self.options['average'] > 1:
            # Average the distance over several results
            resnums = range(resnum, resnum + self.options['average'])
        else:
            # Just a single result
            resnums = [resnum]

        resultsems = []
        for resnum in resnums:
            # Get the result semantics that we're going to try to match
            if resnum >= len(state.results):
                raise ShellError, "No result number %d" % resnum
            result = state.results[resnum]
            resultsems.append(result.semantics)

        # Get the loaded songset containing the song corpus
        songset = state.get_data(
            "songset", help_msg="Use command 'loadsongs' to load a songset")

        # Load the appropriate metric
        if metric_name is None:
            # Use the first in the list as default
            metric_cls = Formalism.semantics_distance_metrics[0]
        else:
            for m in Formalism.semantics_distance_metrics:
                if m.name == metric_name:
                    metric_cls = m
                    break
            else:
                # No metric found matching this name
                print "No metric '%s'" % metric_name
                sys.exit(1)
        print "Using distance metric: %s\n" % metric_cls.name
        # Now process the metric options
        moptstr = self.options['mopts']
        if moptstr is not None:
            if moptstr == "help":
                # Output this metric's option help
                print options_help_text(
                    metric_cls.OPTIONS,
                    intro="Available options for metric '%s'" %
                    metric_cls.name)
                return
        else:
            moptstr = ""
        mopts = ModuleOption.process_option_string(moptstr)
        # Instantiate the metric with these options
        metric = metric_cls(options=mopts)

        song_distances = {}
        # Try matching against each song
        for resultsem in resultsems:
            for name, song in songset.analyses:
                distance = metric.distance(resultsem, song)
                song_distances.setdefault(name, []).append(distance)
        # Average the scores
        distances = []
        for name, costs in song_distances.items():
            ave_cost = sum(costs) / float(len(costs))
            distances.append((ave_cost, name))

        # Sort so the closest ones come first
        distances.sort(key=lambda x: x[0])

        # Output all the songs, ordered by similarity, with their distance
        for i, (distance, name) in enumerate(distances):
            print "%d> %s  (%s)" % (i, name, distance)
예제 #28
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a supertagging model using the given "\
        "input data. Specify a model type (baseline1, etc) and a name to "\
        "identify it. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file. "\
        "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS)
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Logging output
    parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end")
    options, arguments = parse_args_with_config(parser)
    
    grammar = Grammar()
    
    # Get the model type first: we might not need the other args
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
    model_type = arguments[0]
    
    if model_type not in TRAINABLE_MODELS:
        print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \
            (model_type, ", ".join(TRAINABLE_MODELS))
        sys.exit(1)
    if model_type not in TAGGERS:
        print >>sys.stderr, "'%s' isn't a registered model type. Check that "\
            "the name in TRAINABLE_MODELS is correct" % model_type
        sys.exit(1)
    
    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >>sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (tagger_cls.__name__)
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
                            ModuleOption.process_option_string(options.training_opts), 
                            model_cls.TRAINING_OPTIONS)
    
    # Get the rest of the args
    if len(arguments) < 3:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_name = arguments[1]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(single=False, bulk=True))
    
    if options.partitions is not None and options.partitions > 1:
        parts = input_data.get_partitions(options.partitions)[1]
        models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \
                                                num,seqs in enumerate(parts)]
    else:
        models = [(model_name,input_data)]
    
    for part_name,seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None
            
        # Train the model with the loaded data
        model.train(seqs, logger=logger)
        model.save()
        print "Trained model %s" % (part_name)
예제 #29
0
def command_line_metric(formalism, metric_name=None, options=""):
    """
    Utility function to make it easy to load a metric, with user-specified 
    options, from the command line. Takes care of printing help output.
    
    Typical options::
      parser.add_option("-m", "--metric", dest="metric", action="store", 
          help="semantics distance metric to use. Use '-m help' for a list of available metrics")
      parser.add_option("--mopt", "--metric-options", dest="mopts", action="append", 
          help="options to pass to the semantics metric. Use with '--mopt help' with -m to see available options")
    
    You could then call this as::
      metric = command_line_metric(formalism, options.metric, options.mopts)
    
    @return: the metric instantiated with given options
    
    """
    import sys
    from jazzparser.utils.options import ModuleOption, options_help_text
    
    # Get a distance metric
    # Just check this, as it'll cause problems
    if len(formalism.semantics_distance_metrics) == 0:
        print "ERROR: the formalism defines no distance metrics, so this "\
            "script won't work"
        sys.exit(1)
    
    # First get the metric
    if metric_name == "help":
        # Print out a list of metrics available
        print "Available distance metrics:"
        print ", ".join([metric.name for metric in \
                                        formalism.semantics_distance_metrics])
        sys.exit(0)
    
    if metric_name is None:
        # Use the first in the list as default
        metric_cls = formalism.semantics_distance_metrics[0]
    else:
        # Look for the named metric
        for m in formalism.semantics_distance_metrics:
            if m.name == metric_name:
                metric_cls = m
                break
        else:
            # No metric found matching this name
            print "No metric '%s'" % metric_name
            sys.exit(1)
    
    # Options might be given as a list, if the option action was "append"
    if isinstance(options, str):
        options = [options]
    # Now process the metric options
    if options is not None:
        moptstr = options
        if "help" in [s.strip().lower() for s in options]:
            # Output this parser's option help
            print options_help_text(metric_cls.OPTIONS, 
                intro="Available options for metric '%s'" % metric_cls.name)
            sys.exit(0)
        moptstr = ":".join(moptstr)
    else:
        moptstr = ""
    mopts = ModuleOption.process_option_string(moptstr)
    # Instantiate the metric with these options
    metric = metric_cls(options=mopts)
    
    return metric