Example #1
0
def main():
    usage = "%prog [options] <model-name>"
    description = "Outputs a list of trained models"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-t', '--type', dest="type", action="store", help="model type ('help' to list model types)")
    options, arguments = parser.parse_args()
    
    if options.type is not None:
        if options.type not in MODEL_TYPES:
            if options.type.lower() != "help":
                print "No model type '%s'. Available model types:"
            print "\n".join(MODEL_TYPES.keys())
            sys.exit(0)
        else:
            cls = MODEL_TYPES[options.type]
    else:
        cls = RaphstoHmm
    models = cls.list_models()
    if len(models):
        print "\n".join(models)
Example #2
0
def main():
    usage = "%prog [options] <model-name>"
    description = "Outputs a list of trained models"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-t',
                      '--type',
                      dest="type",
                      action="store",
                      help="model type ('help' to list model types)")
    options, arguments = parser.parse_args()

    if options.type is not None:
        if options.type not in MODEL_TYPES:
            if options.type.lower() != "help":
                print "No model type '%s'. Available model types:"
            print "\n".join(MODEL_TYPES.keys())
            sys.exit(0)
        else:
            cls = MODEL_TYPES[options.type]
    else:
        cls = RaphstoHmm
    models = cls.list_models()
    if len(models):
        print "\n".join(models)
Example #3
0
def main():
    usage = "%prog [options] <model-name>"
    description = "Outputs a Latex summary of a raphsto model's parameters"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-m', '--model-type', dest="model_type", action="store", help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()), default="standard")
    parser.add_option('--head', dest="head", action="store", help="Latex command to use for headings", default="\\subsection")
    parser.add_option('--subhead', dest="subhead", action="store", help="Latex command to use for subheadings", default="\\subsubsection*")
    options, arguments = parser.parse_args()
    
    if len(arguments) < 1:
        print >>sys.stderr, "You must specify a model name as the first argument"
        sys.exit(1)
    model_name = arguments[0]
    
    if options.model_type not in MODEL_TYPES:
        print >>sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]
    
    # Load the model
    model = model_cls.load_model(model_name)
    
    if options.model_type == "unigram":
        show_em = True
        show_ktrans = False
        show_ctrans = False
        show_chord = False
    else:
        show_em = True
        show_ktrans = True
        show_ctrans = True
        show_chord = True
    
    def _heading(title):
        return "%s{%s}" % (options.head, title)
    def _subheading(title):
        return "\n%s{%s}\n" % (options.subhead, title)
    
    if show_em:
        print _heading("Emission Distribution")
        DMEANING = {
            0 : 'chord root',
            1 : 'chord 3rd',
            2 : 'chord 5th',
            3 : 'other scale note',
            4 : 'non-scale note',
        }
        BCMEANING = {
            0 : '1st beat',
            1 : '3rd beat',
            2 : '2nd or 4th beat',
            3 : 'off beat',
        }
        for cond in model.emission_dist.conditions():
            print _subheading("Beat category: %s (%s)" % (cond, BCMEANING.get(cond, '?')))
            print
            print "\\begin{tabular}{l l l}"
            for samp in model.emission_dist[cond].samples():
                print "D = %s & %.5f & (%s)\\\\" % (samp, 
                                              model.emission_dist[cond].prob(samp),
                                              DMEANING.get(samp, '?'))
            print "\\end{tabular}"
            print
    
    if show_ktrans:
        print
        print _heading("Key Transition Distribution")
        for cond in model.key_transition_dist.conditions():
            print _subheading("Previous mode: %s" % constants.MODE_NAMES[cond])
            print "\\begin{tabular}{l l l}"
            print "\\textbf{Key} & \\textbf{Mode} \\\\"
            samp_probs = [(model.key_transition_dist[cond].prob(samp),samp) for samp in model.key_transition_dist[cond].samples()]
            for (prob,samp) in reversed(sorted(samp_probs)):
                print "%s & %s & %.5f \\\\" % (
                            constants.RELATIVE_TONIC_NAMES.get(samp[0], '?'), 
                            constants.MODE_NAMES[samp[1]], 
                            prob)
            print "\\end{tabular}"
    
    if show_ctrans:
        print
        print _heading("Chord Transition Distribution")
        for cond in model.chord_transition_dist.conditions():
            print
            print _subheading("Previous chord: %s" % constants.CHORD_NAMES[cond])
            print
            print "\\begin{tabular}{l l}"
            samp_probs = [(model.chord_transition_dist[cond].prob(samp),samp) for samp in model.chord_transition_dist[cond].samples()]
            for (prob,samp) in reversed(sorted(samp_probs)):
                print "%s & %.5f \\\\" % (constants.CHORD_NAMES[samp], prob)
            print "\\end{tabular}"
    
    if show_chord:
        print
        print _heading("Key Change Chord Distribution")
        samp_probs = [(model.chord_dist.prob(samp),samp) for samp in model.chord_dist.samples()]
        print "\\begin{tabular}{l l}"
        for (prob,samp) in reversed(sorted(samp_probs)):
            print "%s & %.5f \\\\" % (constants.CHORD_NAMES[samp], prob)
        print "\\end{tabular}"
def main():
    usage = "%prog [options] <model_name> <input-file>"
    description = (
        "Trains a model for the RaphSto chord labelling "
        "algorithm on a file that contains a list of midi files with "
        "training options"
    )
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "-p",
        "--partitions",
        dest="partitions",
        action="store",
        type="int",
        help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.",
    )
    parser.add_option(
        "--opts",
        dest="opts",
        action="store",
        help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.",
    )
    parser.add_option(
        "--proc",
        "--processes",
        dest="processes",
        action="store",
        type="int",
        help="number of parallel processes to spawn for the training. Use -1 to spawn one per training sequence (after splitting: see split_length)",
        default=1,
    )
    parser.add_option(
        "--max-length",
        dest="max_length",
        action="store",
        type="int",
        help="limits the length of the training midi sequences in chunks",
    )
    parser.add_option(
        "--split-length",
        dest="split_length",
        action="store",
        type="int",
        help="limits the length of the training midi sequences in chunks, but instead of throwing away everything after the first N chunks, splits it off as if it were starting a new sequence. This is good for multiprocessing, since many short sequences can be multitasked, whilst few long ones cannot",
    )
    parser.add_option(
        "--min-length",
        dest="min_length",
        action="store",
        type="int",
        help="ignores any sequences under this number of chunks. This is useful with --split-length, which can leave very short sequences from the end of a split sequence",
    )
    parser.add_option(
        "--progress-out",
        dest="progress_out",
        action="store",
        help="output logging info to a file instead of the command line",
    )
    parser.add_option(
        "--init-model",
        dest="init_model",
        action="store",
        help="initialize the model using parameters from an already trained model",
    )
    parser.add_option(
        "--init-ctrans",
        dest="init_ctrans",
        action="store",
        help="initialize the chord transition distribution using these parameters. Comma-separated list of params given as C0->C1-P, where C0 and C1 are chords (I, II, etc) and P is a float probability",
    )
    parser.add_option(
        "--chord-set",
        dest="chord_set",
        action="store",
        help="use a chord set other than the default. Use value 'help' to see a list. Has no effect in combination with --init-model, since the old model's chord set will be used",
    )
    parser.add_option(
        "-m",
        "--model-type",
        dest="model_type",
        action="store",
        help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()),
        default="standard",
    )
    options, arguments = parse_args_with_config(parser)

    if options.opts is not None and options.opts == "help":
        print options_help_text(RaphstoBaumWelchTrainer.OPTIONS, intro="Training options for Raphael and Stoddard HMMs")
        sys.exit(0)
    opts = ModuleOption.process_option_string(options.opts)

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    print >> sys.stderr, "Raphsto training beginning at %s" % datetime.now().isoformat(" ")
    # Create a logger to output the progress of the training to stdout or a file
    if options.progress_out is not None:
        stdout = False
        logfile = options.progress_out
        print >> sys.stderr, "Outputing logging info to %s" % logfile
    else:
        stdout = True
        logfile = None
        print >> sys.stderr, "Outputing logging to stdout"
    logger = create_logger(name="raphsto_train", filename=logfile, stdout=stdout)
    logger.info("Raphael and Stoddard HMM model training")

    if options.model_type not in MODEL_TYPES:
        print >> sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]

    if options.chord_set == "help":
        print "Available chord sets: %s" % ", ".join(constants.CHORD_SETS.keys())
        sys.exit(0)
    elif options.chord_set is not None:
        # Check this chord set exists
        if options.chord_set not in constants.CHORD_SETS:
            print >> sys.stderr, "Chord set '%s' does not exist" % options.chord_set
            sys.exit(1)
        else:
            logger.info("Using chord set '%s'" % options.chord_set)

    # Read in the training data
    midis = InputSourceFile(filename)
    handlers = midis.get_handlers()
    logger.info("Reading in %d midi files..." % len(midis.inputs))
    training_data = []
    for i, mh in enumerate(handlers):
        logger.info("%s: %s" % (i, midis.inputs[i][0]))
        emissions = mh.get_emission_stream()[0]
        if options.max_length is not None and len(emissions) > options.max_length:
            logger.info("Truncating file %d to %d chunks (was %d)" % (i, options.max_length, len(emissions)))
            emissions = emissions[: options.max_length]
        if options.split_length is not None:
            logger.info("Splitting sequence %d into sequence no longer " "than %d chunks" % (i, options.split_length))
            # Split up the sequence if it's too long
            while len(emissions) > options.split_length:
                training_data.append(emissions[: options.split_length])
                emissions = emissions[options.split_length :]
        training_data.append(emissions)

    if options.min_length is not None:
        # Make sure there are no sequences under the minimum length
        # Just throw away any that are
        before_chuck = len(training_data)
        training_data = [seq for seq in training_data if len(seq) >= options.min_length]
        if len(training_data) != before_chuck:
            logger.info(
                "Threw away %d short sequences (below %d chunks)"
                % ((before_chuck - len(training_data)), options.min_length)
            )

    logger.info(
        "Training on %d sequences. Lengths: %s"
        % (len(training_data), ", ".join(str(len(seq)) for seq in training_data))
    )

    if options.partitions is not None:
        parts = holdout_partition(training_data, options.partitions)
        models = [("%s%d" % (model_name, num), data) for num, data in enumerate(parts)]
    else:
        models = [(model_name, training_data)]

    # Number of processes to use
    if options.processes == -1:
        # Special value: means number of training sequences (one process per sequence)
        processes = len(training_data)
    else:
        processes = options.processes

    for part_name, data in models:
        # Instantiate a fresh model with this name
        logger.info("Training model '%s' on %d midis" % (part_name, len(data)))
        if options.init_model is not None:
            logger.info("Initializing using parameters from model '%s'" % options.init_model)
            # Load an already trained model as initialization
            model = model_cls.initialize_existing_model(options.init_model, model_name=part_name)
        else:
            # TODO: make these probs an option
            ctype_params = (0.5, 0.3, 0.2)
            logger.info("Initializing to naive chord types using parameters: " "%s, %s, %s" % ctype_params)
            init_kwargs = {"model_name": part_name}
            if options.chord_set is not None:
                # Specify a chord set for the model
                init_kwargs["chord_set"] = options.chord_set
            model = model_cls.initialize_chord_types(ctype_params, **init_kwargs)

            # Initialize the chord transition probabilities if given
            if options.init_ctrans is not None:
                logger.info("Initializing chord transition distribution to %s" % options.init_ctrans)
                model.set_chord_transition_probabilities(options.init_ctrans)
        # Retrain it with the loaded data
        trainer = model_cls.get_trainer()(model, options=opts)
        trainer.train(data, logger=logger, processes=processes, save_intermediate=True)
    print >> sys.stderr, "Training terminating at %s" % datetime.now().isoformat(" ")
def main():
    usage = "%prog [options] <model-name> <midi-file>"
    description = "Assigns harmonic labels to a midi file using a trained " "Raphsto model"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "-t",
        "--time-unit",
        dest="time_unit",
        action="store",
        type="float",
        help="number of beats to take as the basic unit (chunk size) for labelling",
        default=2,
    )
    parser.add_option(
        "-o",
        "--tick-offset",
        dest="tick_offset",
        action="store",
        type="int",
        help="time in midi ticks at which to start labelling",
        default=0,
    )
    parser.add_option(
        "-d",
        "--remove-drums",
        dest="remove_drums",
        action="store_true",
        help="ignores any channel 10 events in the midi file",
        default=False,
    )
    parser.add_option(
        "-c",
        "--chords",
        dest="chords",
        action="store_true",
        help="displays just chord roots instead of full analysis (default: both)",
    )
    parser.add_option(
        "-a",
        "--analysis",
        dest="analysis",
        action="store_true",
        help="displays a full analysis without reducing to chord roots (default: both)",
    )
    parser.add_option(
        "-r",
        "--realize",
        dest="realize",
        action="store",
        help="realize the chord sequence as a midi file (very basic and horrible realization)",
    )
    parser.add_option(
        "--rands",
        "--raphsto",
        dest="raphsto",
        action="store_true",
        help="displays analysis in the style of the annotations added to MIDI files by the original implementation",
    )
    parser.add_option(
        "--lyrics",
        dest="lyrics",
        action="store_true",
        help="include the chord labels as lyric events in the midi file",
        default=False,
    )
    parser.add_option(
        "-m",
        "--model-type",
        dest="model_type",
        action="store",
        help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()),
        default="standard",
    )
    options, arguments = parse_args_with_config(parser)

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model name and an input midi file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    if options.model_type not in MODEL_TYPES:
        print >> sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]

    # Load the model
    model = model_cls.load_model(model_name)

    mid = read_midifile(filename)
    bar = mid.resolution * options.time_unit
    handler = MidiHandler(
        mid, time_unit=options.time_unit, tick_offset=options.tick_offset, remove_drums=options.remove_drums
    )
    # Decode using the model to get a list of states
    state_changes = model.label(handler)
    states, times = zip(*state_changes)

    if options.chords:
        print "\n".join("%s (bar %d)" % (format_state_as_chord(st), time / bar) for st, time in state_changes)
    elif options.analysis:
        print "\n".join("%s (bar %d)" % (format_state(st), time / bar) for st, time in state_changes)
    elif options.raphsto:
        print "\n".join(format_state_as_raphsto(st, (time / bar)) for st, time in state_changes)
    else:
        print "\n".join(
            "%s%s(bar %d)" % (format_state(st).ljust(15), format_state_as_chord(st).ljust(7), time / bar)
            for st, time in state_changes
        )

    if options.realize is not None:
        # Realize as a midi file
        real = ChordSequenceRealizer(
            states, resolution=mid.resolution, times=times, chord_length=options.time_unit, text_events=options.lyrics
        )
        stream = real.generate(overlay=mid, offset=options.tick_offset)
        write_midifile(stream, options.realize)
Example #6
0
def main():
    usage = "%prog [options] <model-name> <midi-file>"
    description = "Assigns harmonic labels to a midi file using a trained "\
        "Raphsto model"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-t',
        '--time-unit',
        dest="time_unit",
        action="store",
        type="float",
        help=
        "number of beats to take as the basic unit (chunk size) for labelling",
        default=2)
    parser.add_option('-o',
                      '--tick-offset',
                      dest="tick_offset",
                      action="store",
                      type="int",
                      help="time in midi ticks at which to start labelling",
                      default=0)
    parser.add_option('-d',
                      '--remove-drums',
                      dest="remove_drums",
                      action="store_true",
                      help="ignores any channel 10 events in the midi file",
                      default=False)
    parser.add_option(
        '-c',
        '--chords',
        dest="chords",
        action="store_true",
        help=
        "displays just chord roots instead of full analysis (default: both)")
    parser.add_option(
        '-a',
        '--analysis',
        dest="analysis",
        action="store_true",
        help=
        "displays a full analysis without reducing to chord roots (default: both)"
    )
    parser.add_option(
        '-r',
        '--realize',
        dest="realize",
        action="store",
        help=
        "realize the chord sequence as a midi file (very basic and horrible realization)"
    )
    parser.add_option(
        '--rands',
        '--raphsto',
        dest="raphsto",
        action="store_true",
        help=
        "displays analysis in the style of the annotations added to MIDI files by the original implementation"
    )
    parser.add_option(
        '--lyrics',
        dest="lyrics",
        action="store_true",
        help="include the chord labels as lyric events in the midi file",
        default=False)
    parser.add_option(
        '-m',
        '--model-type',
        dest="model_type",
        action="store",
        help="select a model type: one of %s (default: standard)" %
        ", ".join(mt for mt in MODEL_TYPES.keys()),
        default="standard")
    options, arguments = parse_args_with_config(parser)

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model name and an input midi file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    if options.model_type not in MODEL_TYPES:
        print >> sys.stderr, "Model type must be one of: %s" % ", ".join(
            mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]

    # Load the model
    model = model_cls.load_model(model_name)

    mid = read_midifile(filename)
    bar = mid.resolution * options.time_unit
    handler = MidiHandler(mid,
                          time_unit=options.time_unit,
                          tick_offset=options.tick_offset,
                          remove_drums=options.remove_drums)
    # Decode using the model to get a list of states
    state_changes = model.label(handler)
    states, times = zip(*state_changes)

    if options.chords:
        print "\n".join("%s (bar %d)" % (format_state_as_chord(st),time/bar) \
                                            for st,time in state_changes)
    elif options.analysis:
        print "\n".join("%s (bar %d)" % (format_state(st),time/bar) \
                                            for st,time in state_changes)
    elif options.raphsto:
        print "\n".join(format_state_as_raphsto(st, (time/bar)) \
                                            for st,time in state_changes)
    else:
        print "\n".join("%s%s(bar %d)" % \
                                    (format_state(st).ljust(15),
                                     format_state_as_chord(st).ljust(7),
                                     time/bar) for st,time in state_changes)

    if options.realize is not None:
        # Realize as a midi file
        real = ChordSequenceRealizer(states,
                                     resolution=mid.resolution,
                                     times=times,
                                     chord_length=options.time_unit,
                                     text_events=options.lyrics)
        stream = real.generate(overlay=mid, offset=options.tick_offset)
        write_midifile(stream, options.realize)
Example #7
0
def main():
    usage = "%prog [options] <model-name>"
    description = "Outputs information about a trained Raphsto model"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-m', '--model-type', dest="model_type", action="store", help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()), default="standard")
    options, arguments = parser.parse_args()
    
    if len(arguments) < 1:
        print >>sys.stderr, "You must specify a model name as the first argument"
        sys.exit(1)
    model_name = arguments[0]
    
    if options.model_type not in MODEL_TYPES:
        print >>sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]
    
    # Load the model
    model = model_cls.load_model(model_name)
    
    print "Raphael & Stoddard trained model, %s: %s" % (options.model_type, model_name)
    print "as on %s" % datetime.now().strftime("%a %d %b %Y")
    
    if options.model_type == "unigram":
        show_em = True
        show_ktrans = False
        show_ctrans = False
        show_chord = False
    else:
        show_em = True
        show_ktrans = True
        show_ctrans = True
        show_chord = True
    
    if show_em:
        print
        print "Emission distribution"
        DMEANING = {
            0 : 'chord root',
            1 : 'chord 3rd',
            2 : 'chord 5th',
            3 : 'other scale note',
            4 : 'non-scale note',
        }
        BCMEANING = {
            0 : '1st beat',
            1 : '3rd beat',
            2 : '2nd or 4th beat',
            3 : 'off beat',
        }
        for cond in model.emission_dist.conditions():
            print " Beat category: %s (%s)" % (cond, BCMEANING.get(cond, '?'))
            for samp in model.emission_dist[cond].samples():
                print "   %s(%s)" % (("D = %s: %.5f" % (samp, 
                                                                  model.emission_dist[cond].prob(samp))
                                                                ).ljust(15),
                                               DMEANING.get(samp, '?'))
    
    if show_ktrans:
        print "\n\nTransition distributions"
        print "Key transition distribution"
        for cond in model.key_transition_dist.conditions():
            print " Previous mode: %s" % constants.MODE_NAMES[cond]
            samp_probs = [(model.key_transition_dist[cond].prob(samp),samp) for samp in model.key_transition_dist[cond].samples()]
            for (prob,samp) in reversed(sorted(samp_probs)):
                print "%smode = %s: %.5f" % (("   key = %s (%s)," % (constants.RELATIVE_TONIC_NAMES.get(samp[0], '?'), 
                                                                samp[0])).ljust(20), 
                                         constants.MODE_NAMES[samp[1]], prob)
    
    if show_ctrans:
        print "\nChord transition distribution"
        for cond in model.chord_transition_dist.conditions():
            print " Previous chord: %s" % constants.CHORD_NAMES[cond]
            samp_probs = [(model.chord_transition_dist[cond].prob(samp),samp) for samp in model.chord_transition_dist[cond].samples()]
            for (prob,samp) in reversed(sorted(samp_probs)):
                print "   %s%.5f" % (("%s:" % constants.CHORD_NAMES[samp]).ljust(5), 
                                     prob)
    
    if show_chord:
        print "\nKey change chord dist"
        samp_probs = [(model.chord_dist.prob(samp),samp) for samp in model.chord_dist.samples()]
        for (prob,samp) in reversed(sorted(samp_probs)):
            print " %s%.5f" % (("%s:" % constants.CHORD_NAMES[samp]).ljust(5), 
                               prob)
    
    print "\n======================="
    print "Model training history:"
    print model.history
    print "\n============="
    print "Description:"
    print model.description
Example #8
0
def main():
    usage = "%prog [options] <model_name> <input-file>"
    description = "Trains a model for the RaphSto chord labelling "\
        "algorithm on a file that contains a list of midi files with "\
        "training options"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    parser.add_option('--proc', '--processes', dest="processes", action="store", type="int", help="number of parallel processes to spawn for the training. Use -1 to spawn one per training sequence (after splitting: see split_length)", default=1)
    parser.add_option('--max-length', dest="max_length", action="store", type="int", help="limits the length of the training midi sequences in chunks")
    parser.add_option('--split-length', dest="split_length", action="store", type="int", help="limits the length of the training midi sequences in chunks, but instead of throwing away everything after the first N chunks, splits it off as if it were starting a new sequence. This is good for multiprocessing, since many short sequences can be multitasked, whilst few long ones cannot")
    parser.add_option('--min-length', dest="min_length", action="store", type="int", help="ignores any sequences under this number of chunks. This is useful with --split-length, which can leave very short sequences from the end of a split sequence")
    parser.add_option('--progress-out', dest="progress_out", action="store", help="output logging info to a file instead of the command line")
    parser.add_option('--init-model', dest="init_model", action="store", help="initialize the model using parameters from an already trained model")
    parser.add_option('--init-ctrans', dest="init_ctrans", action="store", help="initialize the chord transition distribution using these parameters. Comma-separated list of params given as C0->C1-P, where C0 and C1 are chords (I, II, etc) and P is a float probability")
    parser.add_option('--chord-set', dest="chord_set", action="store", help="use a chord set other than the default. Use value 'help' to see a list. Has no effect in combination with --init-model, since the old model's chord set will be used")
    parser.add_option('-m', '--model-type', dest="model_type", action="store", help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()), default="standard")
    options, arguments = parse_args_with_config(parser)
    
    if options.opts is not None and options.opts == "help":
        print options_help_text(RaphstoBaumWelchTrainer.OPTIONS, intro="Training options for Raphael and Stoddard HMMs")
        sys.exit(0)
    opts = ModuleOption.process_option_string(options.opts)
    
    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]
    
    print >>sys.stderr, "Raphsto training beginning at %s" % datetime.now().isoformat(' ')
    # Create a logger to output the progress of the training to stdout or a file
    if options.progress_out is not None:
        stdout = False
        logfile = options.progress_out
        print >>sys.stderr, "Outputing logging info to %s" % logfile
    else:
        stdout = True
        logfile = None
        print >>sys.stderr, "Outputing logging to stdout"
    logger = create_logger(name="raphsto_train", filename=logfile, stdout=stdout)
    logger.info("Raphael and Stoddard HMM model training")
        
    if options.model_type not in MODEL_TYPES:
        print >>sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]
    
    if options.chord_set == "help":
        print "Available chord sets: %s" % ", ".join(constants.CHORD_SETS.keys())
        sys.exit(0)
    elif options.chord_set is not None:
        # Check this chord set exists
        if options.chord_set not in constants.CHORD_SETS:
            print >>sys.stderr, "Chord set '%s' does not exist" % options.chord_set
            sys.exit(1)
        else:
            logger.info("Using chord set '%s'" % options.chord_set)
    
    
    # Read in the training data
    midis = InputSourceFile(filename)
    handlers = midis.get_handlers()
    logger.info("Reading in %d midi files..." % len(midis.inputs))
    training_data = []
    for i,mh in enumerate(handlers):
        logger.info("%s: %s" % (i,midis.inputs[i][0]))
        emissions = mh.get_emission_stream()[0]
        if options.max_length is not None and len(emissions) > options.max_length:
            logger.info("Truncating file %d to %d chunks (was %d)" % \
                                    (i,options.max_length,len(emissions)))
            emissions = emissions[:options.max_length]
        if options.split_length is not None:
            logger.info("Splitting sequence %d into sequence no longer "\
                                "than %d chunks" % (i,options.split_length))
            # Split up the sequence if it's too long
            while len(emissions) > options.split_length:
                training_data.append(emissions[:options.split_length])
                emissions = emissions[options.split_length:]
        training_data.append(emissions)
    
    if options.min_length is not None:
        # Make sure there are no sequences under the minimum length
        # Just throw away any that are
        before_chuck = len(training_data)
        training_data = [seq for seq in training_data if len(seq) >= options.min_length]
        if len(training_data) != before_chuck:
            logger.info("Threw away %d short sequences (below %d chunks)" % \
                    ((before_chuck-len(training_data)), options.min_length))
    
    logger.info("Training on %d sequences. Lengths: %s" % \
                    (len(training_data), 
                     ", ".join(str(len(seq)) for seq in training_data)))
    
    if options.partitions is not None:
        parts = holdout_partition(training_data, options.partitions)
        models = [("%s%d" % (model_name,num),data) for num,data in enumerate(parts)]
    else:
        models = [(model_name,training_data)]
        
    # Number of processes to use
    if options.processes == -1:
        # Special value: means number of training sequences (one process per sequence)
        processes = len(training_data)
    else:
        processes = options.processes
    
    for part_name,data in models:
        # Instantiate a fresh model with this name
        logger.info("Training model '%s' on %d midis" % (part_name, len(data)))
        if options.init_model is not None:
            logger.info("Initializing using parameters from model '%s'" % \
                options.init_model)
            # Load an already trained model as initialization
            model = model_cls.initialize_existing_model(options.init_model, \
                model_name=part_name)
        else:
            # TODO: make these probs an option
            ctype_params = (0.5, 0.3, 0.2)
            logger.info("Initializing to naive chord types using parameters: "\
                "%s, %s, %s" % ctype_params)
            init_kwargs = { 'model_name' : part_name }
            if options.chord_set is not None:
                # Specify a chord set for the model
                init_kwargs['chord_set'] = options.chord_set
            model = model_cls.initialize_chord_types(ctype_params, **init_kwargs)
            
            # Initialize the chord transition probabilities if given
            if options.init_ctrans is not None:
                logger.info("Initializing chord transition distribution to %s" \
                    % options.init_ctrans)
                model.set_chord_transition_probabilities(options.init_ctrans)
        # Retrain it with the loaded data
        trainer = model_cls.get_trainer()(model, options=opts)
        trainer.train(data, logger=logger, processes=processes, save_intermediate=True)
    print >>sys.stderr, "Training terminating at %s" % datetime.now().isoformat(' ')
Example #9
0
def main():
    usage = "%prog [options] <model-name>"
    description = "Outputs information about a trained Raphsto model"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-m',
        '--model-type',
        dest="model_type",
        action="store",
        help="select a model type: one of %s (default: standard)" %
        ", ".join(mt for mt in MODEL_TYPES.keys()),
        default="standard")
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print >> sys.stderr, "You must specify a model name as the first argument"
        sys.exit(1)
    model_name = arguments[0]

    if options.model_type not in MODEL_TYPES:
        print >> sys.stderr, "Model type must be one of: %s" % ", ".join(
            mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]

    # Load the model
    model = model_cls.load_model(model_name)

    print "Raphael & Stoddard trained model, %s: %s" % (options.model_type,
                                                        model_name)
    print "as on %s" % datetime.now().strftime("%a %d %b %Y")

    if options.model_type == "unigram":
        show_em = True
        show_ktrans = False
        show_ctrans = False
        show_chord = False
    else:
        show_em = True
        show_ktrans = True
        show_ctrans = True
        show_chord = True

    if show_em:
        print
        print "Emission distribution"
        DMEANING = {
            0: 'chord root',
            1: 'chord 3rd',
            2: 'chord 5th',
            3: 'other scale note',
            4: 'non-scale note',
        }
        BCMEANING = {
            0: '1st beat',
            1: '3rd beat',
            2: '2nd or 4th beat',
            3: 'off beat',
        }
        for cond in model.emission_dist.conditions():
            print " Beat category: %s (%s)" % (cond, BCMEANING.get(cond, '?'))
            for samp in model.emission_dist[cond].samples():
                print "   %s(%s)" % (
                    ("D = %s: %.5f" %
                     (samp, model.emission_dist[cond].prob(samp))).ljust(15),
                    DMEANING.get(samp, '?'))

    if show_ktrans:
        print "\n\nTransition distributions"
        print "Key transition distribution"
        for cond in model.key_transition_dist.conditions():
            print " Previous mode: %s" % constants.MODE_NAMES[cond]
            samp_probs = [
                (model.key_transition_dist[cond].prob(samp), samp)
                for samp in model.key_transition_dist[cond].samples()
            ]
            for (prob, samp) in reversed(sorted(samp_probs)):
                print "%smode = %s: %.5f" % (
                    ("   key = %s (%s)," %
                     (constants.RELATIVE_TONIC_NAMES.get(samp[0], '?'),
                      samp[0])).ljust(20), constants.MODE_NAMES[samp[1]], prob)

    if show_ctrans:
        print "\nChord transition distribution"
        for cond in model.chord_transition_dist.conditions():
            print " Previous chord: %s" % constants.CHORD_NAMES[cond]
            samp_probs = [
                (model.chord_transition_dist[cond].prob(samp), samp)
                for samp in model.chord_transition_dist[cond].samples()
            ]
            for (prob, samp) in reversed(sorted(samp_probs)):
                print "   %s%.5f" % (
                    ("%s:" % constants.CHORD_NAMES[samp]).ljust(5), prob)

    if show_chord:
        print "\nKey change chord dist"
        samp_probs = [(model.chord_dist.prob(samp), samp)
                      for samp in model.chord_dist.samples()]
        for (prob, samp) in reversed(sorted(samp_probs)):
            print " %s%.5f" % (
                ("%s:" % constants.CHORD_NAMES[samp]).ljust(5), prob)

    print "\n======================="
    print "Model training history:"
    print model.history
    print "\n============="
    print "Description:"
    print model.description