예제 #1
0
def main():
    usage = "%prog [options] <in-file>"
    description = "Print out stats about the notes in a MIDI file"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-k', '--key-profile', dest="key_profile", action="store", type="int", help="output a graph of the key profile for the given key as a gnuplot script")
    options, arguments = parse_args_with_config(parser)
    
    if len(arguments) == 0:
        print "No input MIDI file given"
        sys.exit(1)
    filename = arguments[0]
    
    # Load the midi file
    midi = read_midifile(filename)
    print "Midi file type %d" % midi.format
    print "Resolution: %d" % midi.resolution
    print "%d notes" % len(note_ons(midi))
    # Get rid of drums
    midi = simplify(midi, remove_drums=True)
    notes = note_ons(midi)
    print "%d non-drum notes" % len(notes)
    # Analyse the note content
    pcs = dict([(i,0) for i in range(12)])
    for note in notes:
        pcs[note.pitch % 12] += 1
    
    note_names = dict([
        (0, "C"), (1, "C#"), (2, "D"), (3, "D#"), (4, "E"), (5, "F"), 
        (6, "F#"), (7, "G"), (8, "G#"), (9, "A"), (10, "A#"), (11, "B") ])
    # Print the notes
    for pc, count in reversed(sorted(pcs.items(), key=lambda x:x[1])):
        print "%s: %d" % (note_names[pc], count)
    
    if options.key_profile is not None:
        kp_output_file = "key_profile"
        pc_names = ["1", "#1/b2", "2", "#2/b3", "3", "4", "#4/b5", "5", 
            "#5/b6", "6", "#6/b7", "7"]
        # Output the pitch counts
        key = options.key_profile
        # Get the pc frequencies
        pc_freq = [float(pcs[(key+p)%12])/sum(pcs.values()) for p in range(12)]
        # Output them to a CSV
        data = "\n".join("%d\t%s\t%f" % (i,name,freq) for (name,freq,i) in zip(pc_names,pc_freq,range(12)))
        with open("%s.csv" % kp_output_file, 'w') as f:
            f.write(data)
        # Output the Gnuplot script
        gnuplot = """\
set style data lines
set nokey
set xrange [-1:13]
set terminal pdf monochrome
set output "key_profile.pdf"
set xlabel "Pitch class"
plot "key_profile.csv" using 1:3:xticlabel(2)
"""
        with open("%s.p" % kp_output_file, 'w') as f:
            f.write(gnuplot)
        # Run Gnuplot
        call(["gnuplot", "%s.p" % kp_output_file])
        print "Gnuplot plot output to %s.p and %s.pdf" % (kp_output_file,kp_output_file)
예제 #2
0
def main():
    usage = "%prog <model-name>"
    description = "Generate chord sequences from a PCFG model"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-g", "--grammar", dest="grammar", action="store", \
                        help="use the named grammar instead of the default.")
    parser.add_option("-d", "--debug", dest="debug", action="store_true", \
                        help="output debugging information during generation")
    options, arguments = parse_args_with_config(parser)

    if options.debug:
        logger = create_plain_stderr_logger(log_level=logging.DEBUG)
    else:
        logger = create_plain_stderr_logger(log_level=logging.WARN)

    if len(arguments) < 1:
        print "Specify a model name"
        sys.exit(1)
    model_name = arguments[0]

    grammar = get_grammar(options.grammar)
    PcfgModel = grammar.formalism.PcfgModel
    # Load the trained model
    model = PcfgModel.load_model(model_name)

    sequence = model.generate(logger=logger)
    if sequence is None:
        print "Model did not generate a sequence"
    else:
        print sequence
예제 #3
0
def main():
    usage = "%prog <model-name>"
    description = "Generate chord sequences from a PCFG model"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-g", "--grammar", dest="grammar", action="store", \
                        help="use the named grammar instead of the default.")
    parser.add_option("-d", "--debug", dest="debug", action="store_true", \
                        help="output debugging information during generation")
    options, arguments = parse_args_with_config(parser)
    
    if options.debug:
        logger = create_plain_stderr_logger(log_level=logging.DEBUG)
    else:
        logger = create_plain_stderr_logger(log_level=logging.WARN)
    
    if len(arguments) < 1:
        print "Specify a model name"
        sys.exit(1)
    model_name = arguments[0]
    
    grammar = get_grammar(options.grammar)
    PcfgModel = grammar.formalism.PcfgModel
    # Load the trained model
    model = PcfgModel.load_model(model_name)
    
    sequence = model.generate(logger=logger)
    if sequence is None:
        print "Model did not generate a sequence"
    else:
        print sequence
예제 #4
0
def main():
    usage = "%prog [options] <in-file>"
    description = "Play music using the Harmonical. This allows you to "\
        "play music specified precisely in the tonal space. By default, "\
        "plays back the input, but can also output to a file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-o', '--output', dest="outfile", action="store", help="output the result to a wave file instead of playing back.")
    parser.add_option('-m', '--midi', dest="midi", action="store_true", help="generate midi data, not audio. Depends on the input format supporting midi file generation.")
    options, arguments = parse_args_with_config(parser)
    
    filename = arguments[0]
    
    # Load up the input file
    infile = HarmonicalInputFile.from_file(filename)
    if options.midi:
        midi = infile.render_midi()
        
        if options.outfile is not None:
            # Output a midi file
            write_midifile(midi, options.outfile)
            print >>sys.stderr, "Saved midi data to %s" % options.outfile
        else:
            print >>sys.stderr, "Playing..."
            play_stream(midi, block=True)
    else:
        print >>sys.stderr, "Generating audio..."
        audio = infile.render()
        
        if options.outfile is not None:
            # Output to a file instead of playing
            save_wave_data(audio, options.outfile)
            print >>sys.stderr, "Saved data to %s" % options.outfile
        else:
            print >>sys.stderr, "Playing..."
            play_audio(audio, wait_for_end=True)
예제 #5
0
def main():
    usage = "%prog [options] <input-midi> <output-filename>"
    description = "Cleans up a midi file by getting rid of a load of "\
            "stuff that makes the music sound good, but isn't needed "\
            "by our algorithms. See options for details."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-d', '--remove-drums', dest="remove_drums", action="store_true", help="filter out drum tracks", default=False)
    parser.add_option('-p', '--pc', '--remove-program-change', dest="remove_pc", action="store_true", help="filter out all program change (instrument) events", default=False)
    parser.add_option('-x', '--remove-text', '--txt', dest="remove_text", action="store_true", help="filter out all text events of any type", default=False)
    parser.add_option('-o', '--one-track', dest="one_track", action="store_true", help="reduce everything down to one track", default=False)
    parser.add_option('-t', '--remove-tempo', dest="remove_tempo", action="store_true", help="remove all tempo events", default=False)
    parser.add_option('-c', '--remove-control', dest="remove_control", action="store_true", help="remove all control change events", default=False)
    parser.add_option('--ch', '--one-channel', dest="one_channel", action="store_true", help="use only one channel: every event occurs on channel 0", default=False)
    parser.add_option('--mc', '--remove-misc-control', dest="remove_misc_control", action="store_true", help="filter out a whole load of device control events: aftertouch, channel aftertouch, pitch wheel, sysex, port", default=False)
    parser.add_option('--rno', '--real-note-offs', dest="real_note_offs", action="store_true", help="replace 0-velocity note-ons with actual note-offs. Some midi files use one, some the other", default=False)
    parser.add_option('--remove-duplicates', dest="remove_duplicates", action="store_true", help="tidy up at the end to remove any duplicate notes", default=False)
    parser.add_option('-i', '--invert', dest="invert", action="store_true", help="inverts all options. I.e. applies all filters except those selected by the above options", default=False)
    parser.add_option('-r', '--remove-channels', dest="remove_channels", action="append", type="int", help="filter out all events of the numbered channel. Use multiple options to filter multiple channels at once")
    parser.add_option('--resolution', '--res', dest="resolution", action="store", type="int", help="change the resolution of the midi data from that read in from the file to that given")
    options, arguments = parse_args_with_config(parser)
    
    if len(arguments) < 2:
        print >>sys.stderr, "You must specify an input and output filename"
        sys.exit(1)
    in_filename = os.path.abspath(arguments[0])
    out_filename = os.path.abspath(arguments[1])
    
    # Read in the midi file
    mid = read_midifile(in_filename, force_resolution=options.resolution)
    # Build a dictionary of kwargs to select what operations to apply
    filters = {
        'remove_drums' : options.remove_drums ^ options.invert,
        'remove_pc' : options.remove_pc ^ options.invert,
        'remove_all_text' : options.remove_text ^ options.invert,
        'one_track' : options.one_track ^ options.invert,
        'remove_tempo' : options.remove_tempo ^ options.invert,
        'remove_control' : options.remove_control ^ options.invert,
        'one_channel' : options.one_channel ^ options.invert,
        'remove_misc_control' : options.remove_misc_control ^ options.invert,
        'real_note_offs' : options.real_note_offs ^ options.invert,
    }
        
    print "Filters to be applied:"
    if options.remove_channels is not None:
        print "  removing channels: %s" % ", ".join(str(ch) for ch in options.remove_channels)
    if options.resolution is not None:
        print "  changing resolution to %d" % options.resolution
    print "\n".join("  %s" % name for (name,val) in filters.items() if val)
    filters['remove_duplicates'] = options.remove_duplicates
    
    print "Filtering..."
    # Apply channel filters first
    if options.remove_channels is not None:
        remove_channels(mid, options.remove_channels)
    filtered = simplify(mid, **filters)
    print "Midi output to",out_filename
    write_midifile(filtered, out_filename)
예제 #6
0
def main():
    usage = "%prog [options] <midi-file>"
    description = "Divides a midi file into chunks, with a given size and "\
        "offset, and print the chunks consecutively."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-t', '--time-unit', dest="time_unit", action="store", type="float", help="size of chunks in crotchet beats (according to the midi file's resolution)", default=4)
    parser.add_option('-o', '--tick-offset', dest="tick_offset", action="store", type="int", help="offset of the first chunk in midi ticks", default=0)
    parser.add_option('--force-res', dest="force_res", action="store", type="int", help="force the midi file's resolution to be the given value, rather than using that read from the file")
    parser.add_option('-i', "--tick-times", dest="tick_times", action="store_true", help="show times as tick values, rather than proportions of the chunk")
    options, arguments = parse_args_with_config(parser)
    
    filename = arguments[0]
    
    # Load up the input midi file
    infile = read_midifile(filename, force_resolution=options.force_res)
    handler = MidiHandler(infile,
                          time_unit=options.time_unit,
                          tick_offset=options.tick_offset)
    slices = handler.get_slices()
    
    print "Printing %d-beat chunks with a %d-tick offset" % (options.time_unit, options.tick_offset)
    print "Total chunks: %d" % len(slices)
    print
    
    chunk_length = options.time_unit * infile.resolution
    
    for i,slc in enumerate(slices):
        strm = slc.to_event_stream()
        # Print the header for this chunk
        print "Chunk %d: %d-%d (%d events)" % \
                (i, slc.start, slc.end,len(strm.trackpool))
        print "".join(str(i).ljust(2) for i in range(11)), \
                "Time   ", "Vel", "Ch", "Tr"
        
        # Only show note-on events
        noteons = [ev for ev in sorted(strm.trackpool) \
                    if type(ev) == NoteOnEvent and ev.velocity > 0]
        # Sorted by time: within same tick, sort by pitch
        for k,grp in groupby(noteons):
            for ev in sorted(list(grp), key=lambda e:e.pitch):
                # Display all the information for this note
                octave = ev.pitch / 12
                name = constants.NOTE_NAMES[ev.pitch % 12].ljust(2)
                indent = "  " * octave
                fill = "  " * (10-octave)
                if options.tick_times:
                    time = str(ev.tick+slc.start).ljust(7)
                else:
                    time = ("%.1f%%" % (100.0 * ev.tick / chunk_length)).ljust(7)
                channel = str(ev.channel).ljust(2)
                track = str(ev.track).ljust(2)
                velocity = str(ev.velocity).ljust(3)
                
                print "%s%s%s %s %s %s %s" % \
                        (indent, name, fill, time, velocity, channel, track)
        print
예제 #7
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a backoff builder model using the given "\
        "input data. Specify a model type (ngram, etc) and a name to "\
        "identify it. The data file should be a stored SequenceIndex file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    options, arguments = parse_args_with_config(parser)
    
    if len(arguments) < 3:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_type = arguments[0]
    model_name = arguments[1]
    
    builder_cls = get_backoff_builder(model_type)
    model_cls = builder_cls.MODEL_CLASS
    
    # Load the sequence data from a dbinput file
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=['bulk-db', 'bulk-db-annotated'])
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
                            ModuleOption.process_option_string(options.training_opts), 
                            model_cls.TRAINING_OPTIONS)
        
    if options.partitions is not None:
        parts = holdout_partition(input_data, options.partitions)
        models = [(builder_cls.partition_model_name(model_name,num),seqs) for \
                        num,seqs in enumerate(parts)]
    else:
        models = [(model_name,input_data)]
    
    for part_name,seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        # Train it with the loaded data
        model.train(seqs)
        model.save()
        print "Trained model %s" % (part_name)
예제 #8
0
def main():
    usage = "%prog [options] <midi-file>"
    description = "Divides a midi file into chunks, with a given size and "\
        "offset, and plays "\
        "the chunks consecutively, with a gap between each."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-t', '--time-unit', dest="time_unit", action="store", type="float", help="size of chunks in crotchet beats (according to the midi file's resolution)", default=4)
    parser.add_option('-o', '--tick-offset', dest="tick_offset", action="store", type="int", help="offset of the first chunk in midi ticks", default=0)
    parser.add_option('-g', '--gap', dest="gap", action="store", type="float", help="time to wait between playing each chunk in seconds (potentially float). It will take some time to load the chunk and the sequencer usually pauses before reporting it's done: this is not included in this value", default=0.0)
    parser.add_option('-p', '--print', dest="print_events", action="store_true", help="print out all events for each chunk")
    parser.add_option('--pno', '--print-note-ons', dest="print_note_ons", action="store_true", help="print only note-on events")
    parser.add_option('--force-res', dest="force_res", action="store", type="int", help="force the midi file's resolution to be the given value, rather than using that read from the file")
    parser.add_option('-s', '--start', dest="start", action="store", type="int", help="chunk number to start at", default=0)
    options, arguments = parse_args_with_config(parser)
    
    filename = arguments[0]
    
    # Load up the input midi file
    infile = read_midifile(filename, force_resolution=options.force_res)
    handler = MidiHandler(infile,
                          time_unit=options.time_unit,
                          tick_offset=options.tick_offset)
    slices = handler.get_slices()
    
    # Start at the requested chunk
    slices = slices[options.start:]
    
    print "Playing %d-beat chunks with a %d-tick offset" % (options.time_unit, options.tick_offset)
    if options.start > 0:
        print "Start from chunk %d" % options.start
    print "Total chunks: %d" % len(slices)
    print "Ctrl+C to exit"
    print
    
    try:
        for i,slc in enumerate(slices):
            strm = slc.to_event_stream(cancel_playing=True)
            print "Playing chunk %d: %d-%d (%d events)" % (i, slc.start, slc.end,len(strm.trackpool))
            if options.print_events:
                print "\n".join("  %s" % ev for ev in sorted(strm.trackpool))
            elif options.print_note_ons:
                print "\n".join("  %s" % ev for ev in sorted(strm.trackpool) \
                                                    if type(ev) is NoteOnEvent)
            
            play_stream(strm, block=True)
            if options.gap > 0.0:
                print "  Waiting %s seconds..." % options.gap
                time.sleep(options.gap)
    except KeyboardInterrupt:
        print "Exiting"
예제 #9
0
파일: del.py 프로젝트: johndpope/jazzparser
def main():
    usage = "%prog [<options>] <model-name>"
    description = "Delete a PCFG model"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \
                    help="Number of partitions the model is divided into")
    parser.add_option("-g",
                      "--grammar",
                      dest="grammar",
                      action="store",
                      help="use the named grammar instead of the default.")
    options, arguments = parse_args_with_config(parser)

    # Load a grammar
    grammar = get_grammar(options.grammar)
    # Get the pcfg model class for the formalism
    PcfgModel = grammar.formalism.PcfgModel

    if len(arguments) == 0:
        print >> sys.stderr, "Specify a model name"
        models = PcfgModel.list_models()
        print >> sys.stderr, "Available models: %s" % ", ".join(models)
        sys.exit(1)
    model_name = arguments[0]
    print "Model base name:", model_name

    if options.partitions is not None:
        parts = [(i, "%s%d" % (model_name, i))
                 for i in range(options.partitions)]
    else:
        parts = [(None, model_name)]

    # First check all the models exist
    for parti, part_model in parts:
        if part_model not in PcfgModel.list_models():
            print "The model '%s' does not exist" % part_model
            sys.exit(1)

    # Now delete them one by one
    for parti, part_model in parts:
        # Load the model
        model = PcfgModel.load_model(part_model)
        model.delete()
        print "Removed model: %s" % part_model
예제 #10
0
def main():
    usage = "%prog <model-name>"
    description = "Debug a PCFG model"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-g", "--grammar", dest="grammar", action="store", \
                        help="use the named grammar instead of the default.")
    parser.add_option("-d", "--debug", dest="debug", action="store_true", \
                        help="output debugging information during generation")
    parser.add_option("--file-options", "--fopt", dest="file_options", \
                        action="store", help="options for the input file "\
                        "(--file). Type '--fopt help' for a list of available "\
                        "options.")
    options, arguments = parse_args_with_config(parser)

    if len(arguments) < 1:
        print "Specify a model name"
        sys.exit(1)
    model_name = arguments[0]

    if len(arguments) < 2:
        print "Specify an input file"

    grammar = get_grammar(options.grammar)
    PcfgModel = grammar.formalism.PcfgModel
    # Load the trained model
    model = PcfgModel.load_model(model_name)

    # Try getting a file from the command-line options
    input_data = command_line_input(filename=arguments[1],
                                    filetype="db",
                                    options=options.file_options)

    # Prepare the input and annotations
    sequence = input_data.sequence
    categories = [chord.category for chord in sequence.iterator()]
    str_inputs = input_data.inputs
    # Build the implicit normal-form tree from the annotations
    try:
        tree = build_tree_for_sequence(sequence)
    except TreeBuildError, err:
        raise ModelTrainingError, "could not build a tree for '%s': %s" % \
            (sequence.string_name, err)
예제 #11
0
def main():
    usage = "%prog [options] <in-file>"
    description = "Dump a description of all the events in a midi file "\
        "to stdout."
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parse_args_with_config(parser)
    
    if len(arguments) == 0:
        print "No input MIDI file given"
        sys.exit(1)
    filename = arguments[0]
    
    # Load the midi file
    midi = read_midifile(filename)
    print "Midi file type %d" % midi.format
    print "Resolution: %d" % midi.resolution
    for track in range(len(midi)):
        print "\nTrack %d" % track
        for event in sorted(midi[track]):
            print "%s" % (event)
예제 #12
0
def main():
    usage = "%prog [options] <in-file>"
    description = "Dump a description of all the events in a midi file "\
        "to stdout."
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parse_args_with_config(parser)

    if len(arguments) == 0:
        print "No input MIDI file given"
        sys.exit(1)
    filename = arguments[0]

    # Load the midi file
    midi = read_midifile(filename)
    print "Midi file type %d" % midi.format
    print "Resolution: %d" % midi.resolution
    for track in range(len(midi)):
        print "\nTrack %d" % track
        for event in sorted(midi[track]):
            print "%s" % (event)
예제 #13
0
파일: del.py 프로젝트: johndpope/jazzparser
def main():
    usage = "%prog [<options>] <model-name>"
    description = "Delete a PCFG model"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \
                    help="Number of partitions the model is divided into")
    parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.")
    options, arguments = parse_args_with_config(parser)
    
    # Load a grammar
    grammar = get_grammar(options.grammar)
    # Get the pcfg model class for the formalism
    PcfgModel = grammar.formalism.PcfgModel
        
    if len(arguments) == 0:
        print >>sys.stderr, "Specify a model name"
        models = PcfgModel.list_models()
        print >>sys.stderr, "Available models: %s" % ", ".join(models)
        sys.exit(1)
    model_name = arguments[0]
    print "Model base name:", model_name
    
    if options.partitions is not None:
        parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)]
    else:
        parts = [(None, model_name)]

    # First check all the models exist
    for parti,part_model in parts:
        if part_model not in PcfgModel.list_models():
            print "The model '%s' does not exist" % part_model
            sys.exit(1)
    
    # Now delete them one by one
    for parti,part_model in parts:
        # Load the model
        model = PcfgModel.load_model(part_model)
        model.delete()
        print "Removed model: %s" % part_model
예제 #14
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Loads a chord labeling model and uses it to assign chord "\
        "labels to the given MIDI file."
    parser = OptionParser(usage=usage, description=description)
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='segmidi')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Labeling options
    parser.add_option("--labeler-options", "--lopt", dest="labeler_options", action="append", help="options for the labeler. Type '--lopt help' for a list of available options.")
    parser.add_option("--no-key", "--nk", dest="no_key", action="store_true", help="merge together labels with the same key (same as --lopt nokey)")
    # Output options
    parser.add_option("--single", "-1", dest="single", action="store_true", help="show only one chord per time segment (same as --lopt n=1, but formats the output in a simpler way)")
    parser.add_option('-r', '--realize', dest="realize", action="store", help="realize the chord sequence as a midi file, overlaid on the input")
    parser.add_option('--chords-only', dest="chords_only", action="store_true", help="only realize the chords: don't overlay on the input midi (only works with -r)")
    options, arguments = parse_args_with_config(parser)
    
    if options.labeler_options is not None and "help" in options.labeler_options:
        print options_help_text(HPChordLabeler.LABELING_OPTIONS, intro="Options for HP chord labeler")
        sys.exit(0)
        
    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model name and an input "\
            "(MIDI) data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]
    
    # Process the labeler options
    lopt_dict = ModuleOption.process_option_string(options.labeler_options)
    if options.single:
        # No point in getting more than one label, since we only display one
        lopt_dict['n'] = 1
    if options.no_key:
        # Just set the nokey option
        lopt_dict['nokey'] = True
    
    # Check they're valid before doing anything else
    HPChordLabeler.process_labeling_options(lopt_dict)
    
    input_data = command_line_input(filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=['segmidi','bulk-segmidi'])
    bulk = not is_bulk_type(type(input_data))
    if bulk:
        input_data = [input_data]
        
    for i,data in enumerate(input_data):
        input_stream = data.stream
        print "Read midi data in %d segments" % len(data)
        
        # Load the model
        model = HPChordLabeler.load_model(model_name)
        # Perform labeling
        labels = model.label(data, options=lopt_dict)
        # Try labeling as it will be passed to the tagger
        labs = model.label_lattice(data, options=lopt_dict)
        
        if options.single:
            # Special output for single label output
            print ", ".join(["%s" % timelabs[0][0] for timelabs in labels])
        else:
            # Print out the labels for each timestep
            for time,timelabs in enumerate(labels):
                print "%d: %s" % (time, 
                    ", ".join(["%s (%.2e)" % (label,prob) for (label,prob) in timelabs]))
        
        if options.realize is not None:
            # Get the single best chord label for each time
            best_labels = [timelabs[0][0] for timelabs in labels]
            # Realize as a midi file
            print "Realizing output chord sequence"
            real = ChordSequenceRealizer(best_labels, 
                                         model.chord_vocab, 
                                         resolution=input_stream.resolution, 
                                         chord_length=data.time_unit,
                                         text_events=True)
            if options.chords_only:
                # Don't overlay
                stream = real.generate(offset=data.tick_offset)
            else:
                stream = real.generate(overlay=input_stream, offset=data.tick_offset)
                
            if bulk:
                filename = "%s-%d" % (options.realize, i)
            else:
                filename = options.realize
            write_midifile(stream, filename)
예제 #15
0
def main():
    usage = "%prog [<options>] <model-name>"
    description = "Queries an ngram model interactively"
    optparser = OptionParser(usage=usage, description=description)
    # Read in command line options and args
    options, arguments = parse_args_with_config(optparser)
    
    if len(arguments) < 1:
        print "Specify a model name"
        sys.exit(1)
    model_name = arguments[0]
        
    # Load the ngram model
    ngmodel = NgramTaggerModel.load_model(model_name)
    model = ngmodel.model
    
    input_getter = input_iterator(">> ")
    # Load the shell history if possible
    try:
        readline.read_history_file(settings.NGRAM_QUERY_HISTORY_FILE)
    except IOError:
        # No history file found. No problem
        pass
    print "N-gram model query"
    print "Loaded", model_name
    print
    print "Transition:      t <state> <state-1> ... <state-n>"
    print "Emission:        e <chord> <state>"
    print "State domain:    states"
    print "Emission domain: ems"
    
    def _check_state(s):
        if s not in model.label_dom+[None]:
            raise QueryError, "invalid state label: %s" % s
    
    for query in input_getter:
        query = query.rstrip("\n").strip()
        if query:
            try:
                if query.startswith("states"):
                    print ", ".join(model.label_dom)
                elif query.startswith("ems"):
                    print ", ".join(model.emission_dom)
                elif query.startswith("t"):
                    # Transition prob query
                    states = query.split()[1:]
                    if len(states) != model.order:
                        print "Ngram must have length %d" % model.order
                        continue
                    states = [s if s != "None" else None for s in states]
                    # Verify all these states
                    for state in states:
                        _check_state(state)
                    # Get the transition probability
                    prob = model.transition_probability_debug(*states)
                    print "P(Qi = %s | %s) = %f" % (states[0], 
                                    ", ".join(["Q(i-%d) = %s" % (i+1,s) 
                                        for (i,s) in enumerate(states[1:])]),
                                    prob)
                elif query.startswith("e"):
                    # Emission prob query
                    em_state = query.split()[1:]
                    if len(em_state) != 2:
                        print "Emission query must consist of a chord and a state"
                        continue
                    em, state = em_state
                    # Check the state label's valid
                    _check_state(state)
                    # Get the emission probability
                    prob = model.emission_probability(em, state)
                    # Print out the probability
                    print "P(Oi = %s | Qi = %s) = %f" % (em, state, prob)
                else:
                    print "Invalid query: %s" % query
            except QueryError, err:
                print "Check your query: %s" % err
            except Exception, err:
                print "Error processing query: %s" % err
예제 #16
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a backoff builder model using the given "\
        "input data. Specify a model type (ngram, etc) and a name to "\
        "identify it. The data file should be a stored SequenceIndex file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    options, arguments = parse_args_with_config(parser)

    if len(arguments) < 3:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_type = arguments[0]
    model_name = arguments[1]

    builder_cls = get_backoff_builder(model_type)
    model_cls = builder_cls.MODEL_CLASS

    # Load the sequence data from a dbinput file
    input_data = command_line_input(
        filename=filename,
        filetype=options.filetype,
        options=options.file_options,
        allowed_types=['bulk-db', 'bulk-db-annotated'])

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS,
                                intro="Training options for %s" %
                                model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            model_cls.TRAINING_OPTIONS)

    if options.partitions is not None:
        parts = holdout_partition(input_data, options.partitions)
        models = [(builder_cls.partition_model_name(model_name,num),seqs) for \
                        num,seqs in enumerate(parts)]
    else:
        models = [(model_name, input_data)]

    for part_name, seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        # Train it with the loaded data
        model.train(seqs)
        model.save()
        print "Trained model %s" % (part_name)
예제 #17
0
def main():
    usage = "%prog [<options>] <model-name> <training-input>"
    description = "Training of PCFG models."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \
        help="Number of partitions to divide the data into. "\
            "For train, divides the input file, trains a model on each "\
            "partition's complement and appends partition number to "\
            "the model names. For del, appends partition numbers to model "\
            "names and deletes all the models. Recache does similarly. "\
            "Has no effect for parse.")
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options"
    )
    parser.add_option("--debug",
                      dest="debug",
                      action="store_true",
                      help="Output verbose logging information to stderr")
    parser.add_option("-g",
                      "--grammar",
                      dest="grammar",
                      action="store",
                      help="use the named grammar instead of the default.")
    options, arguments = parse_args_with_config(parser)

    if options.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.WARN
    # Create a logger for training
    logger = create_logger(log_level=log_level, name="training", stderr=True)

    # Load a grammar
    grammar = get_grammar(options.grammar)
    # Get the pcfg model class for the formalism
    PcfgModel = grammar.formalism.PcfgModel

    # Parse the option string
    if options.training_opts is None:
        opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(PcfgModel.TRAINING_OPTIONS,
                                intro="Training options for PCFGs")
        sys.exit(0)
    else:
        opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            PcfgModel.TRAINING_OPTIONS)

    if len(arguments) == 0:
        print >> sys.stderr, "Specify a model name"
        models = PcfgModel.list_models()
        print >> sys.stderr, "Available models: %s" % ", ".join(models)
        sys.exit(1)
    model_name = arguments[0]
    print "Model base name:", model_name

    if options.partitions is not None:
        parts = [(i, "%s%d" % (model_name, i))
                 for i in range(options.partitions)]
    else:
        parts = [(None, model_name)]

    if len(arguments) < 2:
        print >> sys.stderr, "Specify an input file to read sequence data from"
        sys.exit(1)
    # Read in the training data from the given file
    seqs = SequenceIndex.from_file(arguments[1])

    if options.partitions is not None:
        # Prepare each training partition
        datasets = holdout_partition(seqs.sequences, options.partitions)
    else:
        datasets = [seqs.sequences]

    for dataset, (parti, part_model) in zip(datasets, parts):
        # Train the named model on the sequence data
        model = PcfgModel.train(part_model,
                                dataset,
                                opts,
                                grammar=grammar,
                                logger=logger)
        model.save()
        print "Trained model", part_model
예제 #18
0
def main():
    usage = "%prog [<options>]"
    description = "Runs a supertagger from the Jazz Parser to tag some input "\
        "but just outputs the results, rather than continuing to parse."
    optparser = OptionParser(usage=usage, description=description)

    # Tagger options
    optparser.add_option(
        "-t",
        "--tagger",
        "--supertagger",
        dest="supertagger",
        action="store",
        help=
        "run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s"
        % settings.DEFAULT_SUPERTAGGER,
        default=settings.DEFAULT_SUPERTAGGER)
    optparser.add_option(
        "--topt",
        "--tagger-options",
        dest="topts",
        action="append",
        help=
        "specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options."
    )
    # Commonly-used misc
    optparser.add_option("-g",
                         "--grammar",
                         dest="grammar",
                         action="store",
                         help="use the named grammar instead of the default.")
    # File input options
    optparser.add_option(
        "--file",
        "-f",
        dest="file",
        action="store",
        help=
        "use a file to get parser input from. Use --filetype to specify the type of the file."
    )
    optparser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords",
        default='chords')
    optparser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Misc options
    optparser.add_option("-v",
                         "--debug",
                         dest="debug",
                         action="store_true",
                         help="output verbose debugging information.")
    optparser.add_option(
        "-i",
        "--interactive",
        dest="interactive",
        action="store_true",
        help=
        "instead of just outputing all tags in one go, wait for user input between each iteration of adaptive supertagging"
    )
    # Logging options
    optparser.add_option(
        "--logger",
        dest="logger",
        action="store",
        help=
        "directory to put parser logging in. A filename based on an identifier for each individual input will be appended."
    )
    # Read in command line options and args
    options, clinput = parse_args_with_config(optparser)

    ########################### Option processing ####################
    if options.logger:
        # Directory
        parse_logger_dir = options.logger
        check_directory(parse_logger_dir)
    else:
        parse_logger_dir = None

    ######## Grammar ########
    # Read in the grammar
    grammar = get_grammar(options.grammar)

    ######## Supertagger ########
    # Load the supertagger requested
    if options.supertagger.lower() == "help":
        print "Available taggers are: %s" % ", ".join(TAGGERS)
        return 0
    try:
        tagger_cls = get_tagger(options.supertagger)
    except TaggerLoadError:
        logger.error("The tagger '%s' could not be loaded. Possible "\
            "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS)))
        return 1

    # Get supertagger options before initializing the tagger
    if options.topts is not None:
        toptstr = options.topts
        if "help" in [s.strip().lower() for s in toptstr]:
            # Output this tagger's option help
            from jazzparser.utils.options import options_help_text
            print options_help_text(
                tagger_cls.TAGGER_OPTIONS,
                intro="Available options for selected tagger")
            return 0
        toptstr = ":".join(toptstr)
    else:
        toptstr = ""
    topts = ModuleOption.process_option_string(toptstr)
    # Check that the options are valid
    try:
        tagger_cls.check_options(topts)
    except ModuleOptionError, err:
        print "Problem with tagger options (--topt): %s" % err
        return 1
예제 #19
0
파일: tag.py 프로젝트: johndpope/jazzparser
def main():
    usage = "%prog [<options>]"
    description = "Runs a supertagger from the Jazz Parser to tag some input "\
        "but just outputs the results, rather than continuing to parse."
    optparser = OptionParser(usage=usage, description=description)
    
    # Tagger options
    optparser.add_option("-t", "--tagger", "--supertagger", dest="supertagger", action="store", help="run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s" % settings.DEFAULT_SUPERTAGGER, default=settings.DEFAULT_SUPERTAGGER)
    optparser.add_option("--topt", "--tagger-options", dest="topts", action="append", help="specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options.")
    # Commonly-used misc
    optparser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.")
    # File input options
    optparser.add_option("--file", "-f", dest="file", action="store", help="use a file to get parser input from. Use --filetype to specify the type of the file.")
    optparser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords", default='chords')
    optparser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Misc options
    optparser.add_option("-v", "--debug", dest="debug", action="store_true", help="output verbose debugging information.")
    optparser.add_option("-i", "--interactive", dest="interactive", action="store_true", help="instead of just outputing all tags in one go, wait for user input between each iteration of adaptive supertagging")
    # Logging options
    optparser.add_option("--logger", dest="logger", action="store", help="directory to put parser logging in. A filename based on an identifier for each individual input will be appended.")
    # Read in command line options and args
    options, clinput = parse_args_with_config(optparser)
    
    ########################### Option processing ####################
    if options.logger:
        # Directory
        parse_logger_dir = options.logger
        check_directory(parse_logger_dir)
    else:
        parse_logger_dir = None
    
    ######## Grammar ########
    # Read in the grammar
    grammar = get_grammar(options.grammar)
        
    ######## Supertagger ########
    # Load the supertagger requested
    if options.supertagger.lower() == "help":
        print "Available taggers are: %s" % ", ".join(TAGGERS)
        return 0
    try:
        tagger_cls = get_tagger(options.supertagger)
    except TaggerLoadError:
        logger.error("The tagger '%s' could not be loaded. Possible "\
            "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS)))
        return 1
        
    # Get supertagger options before initializing the tagger
    if options.topts is not None:
        toptstr = options.topts
        if "help" in [s.strip().lower() for s in toptstr]:
            # Output this tagger's option help
            from jazzparser.utils.options import options_help_text
            print options_help_text(tagger_cls.TAGGER_OPTIONS, intro="Available options for selected tagger")
            return 0
        toptstr = ":".join(toptstr)
    else:
        toptstr = ""
    topts = ModuleOption.process_option_string(toptstr)
    # Check that the options are valid
    try:
        tagger_cls.check_options(topts)
    except ModuleOptionError, err:
        print "Problem with tagger options (--topt): %s" % err
        return 1
예제 #20
0
def main():
    usage = "%prog [options] <midi-file>"
    description = "Divides a midi file into chunks, with a given size and "\
        "offset, and print the chunks consecutively."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-t',
        '--time-unit',
        dest="time_unit",
        action="store",
        type="float",
        help=
        "size of chunks in crotchet beats (according to the midi file's resolution)",
        default=4)
    parser.add_option('-o',
                      '--tick-offset',
                      dest="tick_offset",
                      action="store",
                      type="int",
                      help="offset of the first chunk in midi ticks",
                      default=0)
    parser.add_option(
        '--force-res',
        dest="force_res",
        action="store",
        type="int",
        help=
        "force the midi file's resolution to be the given value, rather than using that read from the file"
    )
    parser.add_option(
        '-i',
        "--tick-times",
        dest="tick_times",
        action="store_true",
        help="show times as tick values, rather than proportions of the chunk")
    options, arguments = parse_args_with_config(parser)

    filename = arguments[0]

    # Load up the input midi file
    infile = read_midifile(filename, force_resolution=options.force_res)
    handler = MidiHandler(infile,
                          time_unit=options.time_unit,
                          tick_offset=options.tick_offset)
    slices = handler.get_slices()

    print "Printing %d-beat chunks with a %d-tick offset" % (
        options.time_unit, options.tick_offset)
    print "Total chunks: %d" % len(slices)
    print

    chunk_length = options.time_unit * infile.resolution

    for i, slc in enumerate(slices):
        strm = slc.to_event_stream()
        # Print the header for this chunk
        print "Chunk %d: %d-%d (%d events)" % \
                (i, slc.start, slc.end,len(strm.trackpool))
        print "".join(str(i).ljust(2) for i in range(11)), \
                "Time   ", "Vel", "Ch", "Tr"

        # Only show note-on events
        noteons = [ev for ev in sorted(strm.trackpool) \
                    if type(ev) == NoteOnEvent and ev.velocity > 0]
        # Sorted by time: within same tick, sort by pitch
        for k, grp in groupby(noteons):
            for ev in sorted(list(grp), key=lambda e: e.pitch):
                # Display all the information for this note
                octave = ev.pitch / 12
                name = constants.NOTE_NAMES[ev.pitch % 12].ljust(2)
                indent = "  " * octave
                fill = "  " * (10 - octave)
                if options.tick_times:
                    time = str(ev.tick + slc.start).ljust(7)
                else:
                    time = ("%.1f%%" %
                            (100.0 * ev.tick / chunk_length)).ljust(7)
                channel = str(ev.channel).ljust(2)
                track = str(ev.track).ljust(2)
                velocity = str(ev.velocity).ljust(3)

                print "%s%s%s %s %s %s %s" % \
                        (indent, name, fill, time, velocity, channel, track)
        print
예제 #21
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a supertagging model using the given "\
        "input data. Specify a model type (baseline1, etc) and a name to "\
        "identify it. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file. "\
        "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS)
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Logging output
    parser.add_option(
        '--log',
        dest="log",
        action="store",
        help=
        "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end"
    )
    options, arguments = parse_args_with_config(parser)

    grammar = Grammar()

    # Get the model type first: we might not need the other args
    if len(arguments) == 0:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
    model_type = arguments[0]

    if model_type not in TRAINABLE_MODELS:
        print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \
            (model_type, ", ".join(TRAINABLE_MODELS))
        sys.exit(1)
    if model_type not in TAGGERS:
        print >>sys.stderr, "'%s' isn't a registered model type. Check that "\
            "the name in TRAINABLE_MODELS is correct" % model_type
        sys.exit(1)

    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >> sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (
            tagger_cls.__name__)
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS,
                                intro="Training options for %s" %
                                model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            model_cls.TRAINING_OPTIONS)

    # Get the rest of the args
    if len(arguments) < 3:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_name = arguments[1]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(
                                        single=False, bulk=True))

    if options.partitions is not None and options.partitions > 1:
        parts = input_data.get_partitions(options.partitions)[1]
        models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \
                                                num,seqs in enumerate(parts)]
    else:
        models = [(model_name, input_data)]

    for part_name, seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None

        # Train the model with the loaded data
        model.train(seqs, logger=logger)
        model.save()
        print "Trained model %s" % (part_name)
예제 #22
0
def main():
    usage = "%prog [options] <in-file>"
    description = "Print out stats about the notes in a MIDI file"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-k',
        '--key-profile',
        dest="key_profile",
        action="store",
        type="int",
        help=
        "output a graph of the key profile for the given key as a gnuplot script"
    )
    options, arguments = parse_args_with_config(parser)

    if len(arguments) == 0:
        print "No input MIDI file given"
        sys.exit(1)
    filename = arguments[0]

    # Load the midi file
    midi = read_midifile(filename)
    print "Midi file type %d" % midi.format
    print "Resolution: %d" % midi.resolution
    print "%d notes" % len(note_ons(midi))
    # Get rid of drums
    midi = simplify(midi, remove_drums=True)
    notes = note_ons(midi)
    print "%d non-drum notes" % len(notes)
    # Analyse the note content
    pcs = dict([(i, 0) for i in range(12)])
    for note in notes:
        pcs[note.pitch % 12] += 1

    note_names = dict([(0, "C"), (1, "C#"), (2, "D"), (3, "D#"), (4, "E"),
                       (5, "F"), (6, "F#"), (7, "G"), (8, "G#"), (9, "A"),
                       (10, "A#"), (11, "B")])
    # Print the notes
    for pc, count in reversed(sorted(pcs.items(), key=lambda x: x[1])):
        print "%s: %d" % (note_names[pc], count)

    if options.key_profile is not None:
        kp_output_file = "key_profile"
        pc_names = [
            "1", "#1/b2", "2", "#2/b3", "3", "4", "#4/b5", "5", "#5/b6", "6",
            "#6/b7", "7"
        ]
        # Output the pitch counts
        key = options.key_profile
        # Get the pc frequencies
        pc_freq = [
            float(pcs[(key + p) % 12]) / sum(pcs.values()) for p in range(12)
        ]
        # Output them to a CSV
        data = "\n".join("%d\t%s\t%f" % (i, name, freq)
                         for (name, freq,
                              i) in zip(pc_names, pc_freq, range(12)))
        with open("%s.csv" % kp_output_file, 'w') as f:
            f.write(data)
        # Output the Gnuplot script
        gnuplot = """\
set style data lines
set nokey
set xrange [-1:13]
set terminal pdf monochrome
set output "key_profile.pdf"
set xlabel "Pitch class"
plot "key_profile.csv" using 1:3:xticlabel(2)
"""
        with open("%s.p" % kp_output_file, 'w') as f:
            f.write(gnuplot)
        # Run Gnuplot
        call(["gnuplot", "%s.p" % kp_output_file])
        print "Gnuplot plot output to %s.p and %s.pdf" % (kp_output_file,
                                                          kp_output_file)
예제 #23
0
def main():
    usage = "%prog [options] <input>"
    description = "Divides midi files into chunks, with size and offset, "\
        "given in the input file, and plays "\
        "the chunks consecutively. Input is a segmented bulk midi input file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-g',
        '--gap',
        dest="gap",
        action="store",
        type="float",
        help=
        "time to wait between playing each chunk in seconds (potentially float). It will take some time to load the chunk and the sequencer usually pauses before reporting it's done: this is not included in this value",
        default=0.0)
    parser.add_option('-p',
                      '--print',
                      dest="print_events",
                      action="store_true",
                      help="print out all events for each chunk")
    parser.add_option('--pno',
                      '--print-note-ons',
                      dest="print_note_ons",
                      action="store_true",
                      help="print only note-on events")
    parser.add_option(
        '--fopt',
        dest="file_options",
        action="store",
        help=
        "options for file loading. Use '--fopt help' to see available options")
    options, arguments = parse_args_with_config(parser)

    filename = arguments[0]
    # Try getting a file from the command-line options
    input_data = command_line_input(filename=filename,
                                    filetype='bulk-segmidi',
                                    options=options.file_options)

    # Play each input in turn
    input_getter = iter(input_data)
    segmidi = input_getter.next()

    while True:
        print "###############################"
        print "Playing '%s'" % segmidi.name
        print "%s-beat chunks with a %d-tick offset\n" % \
                                    (segmidi.time_unit, segmidi.tick_offset)
        slices = list(segmidi)

        try:
            for i, strm in enumerate(slices):
                print "Playing chunk %d: %d events" % (i, len(strm.trackpool))
                if options.print_events:
                    print "\n".join("  %s" % ev
                                    for ev in sorted(strm.trackpool))
                elif options.print_note_ons:
                    print "\n".join("  %s" % ev for ev in sorted(strm.trackpool) \
                                                    if type(ev) is NoteOnEvent)
                # Play this midi chunk
                play_stream(strm, block=True)
                # Leave a gap before continuing
                if options.gap > 0.0:
                    time.sleep(options.gap)
        except KeyboardInterrupt:
            pass

        print "Continue to next song (<enter>); exit (x); play again (p)"
        command = raw_input(">> ").lower()
        if command == "x":
            sys.exit(0)
        elif command == "p":
            # Play again
            continue
        elif command == "":
            # Move to next
            segmidi = input_getter.next()
            continue
        else:
            print "Unknown command: %s" % command
            print "Playing again..."
            continue
    sys.exit(0)
예제 #24
0
def main():
    usage = "%prog [options] <input>"
    description = "Divides midi files into chunks, with size and offset, "\
        "given in the input file, and plays "\
        "the chunks consecutively. Input is a segmented bulk midi input file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-g', '--gap', dest="gap", action="store", type="float", help="time to wait between playing each chunk in seconds (potentially float). It will take some time to load the chunk and the sequencer usually pauses before reporting it's done: this is not included in this value", default=0.0)
    parser.add_option('-p', '--print', dest="print_events", action="store_true", help="print out all events for each chunk")
    parser.add_option('--pno', '--print-note-ons', dest="print_note_ons", action="store_true", help="print only note-on events")
    parser.add_option('--fopt', dest="file_options", action="store", help="options for file loading. Use '--fopt help' to see available options")
    options, arguments = parse_args_with_config(parser)
    
    filename = arguments[0]
    # Try getting a file from the command-line options
    input_data = command_line_input(filename=filename, 
                                    filetype='bulk-segmidi',
                                    options=options.file_options)
    
    # Play each input in turn
    input_getter = iter(input_data)
    segmidi = input_getter.next()
    
    while True:
        print "###############################"
        print "Playing '%s'" % segmidi.name
        print "%s-beat chunks with a %d-tick offset\n" % \
                                    (segmidi.time_unit, segmidi.tick_offset)
        slices = list(segmidi)
        
        try:
            for i,strm in enumerate(slices):
                print "Playing chunk %d: %d events" % (i, len(strm.trackpool))
                if options.print_events:
                    print "\n".join("  %s" % ev for ev in sorted(strm.trackpool))
                elif options.print_note_ons:
                    print "\n".join("  %s" % ev for ev in sorted(strm.trackpool) \
                                                    if type(ev) is NoteOnEvent)
                # Play this midi chunk
                play_stream(strm, block=True)
                # Leave a gap before continuing
                if options.gap > 0.0:
                    time.sleep(options.gap)
        except KeyboardInterrupt:
            pass
            
        print "Continue to next song (<enter>); exit (x); play again (p)"
        command = raw_input(">> ").lower()
        if command == "x":
            sys.exit(0)
        elif command == "p":
            # Play again
            continue
        elif command == "":
            # Move to next
            segmidi = input_getter.next()
            continue
        else:
            print "Unknown command: %s" % command
            print "Playing again..."
            continue
    sys.exit(0)
예제 #25
0
def main():
    usage = "%prog [options] <model-name> <midi-file>"
    description = "Assigns harmonic labels to a midi file using a trained "\
        "Raphsto model"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-t',
        '--time-unit',
        dest="time_unit",
        action="store",
        type="float",
        help=
        "number of beats to take as the basic unit (chunk size) for labelling",
        default=2)
    parser.add_option('-o',
                      '--tick-offset',
                      dest="tick_offset",
                      action="store",
                      type="int",
                      help="time in midi ticks at which to start labelling",
                      default=0)
    parser.add_option('-d',
                      '--remove-drums',
                      dest="remove_drums",
                      action="store_true",
                      help="ignores any channel 10 events in the midi file",
                      default=False)
    parser.add_option(
        '-c',
        '--chords',
        dest="chords",
        action="store_true",
        help=
        "displays just chord roots instead of full analysis (default: both)")
    parser.add_option(
        '-a',
        '--analysis',
        dest="analysis",
        action="store_true",
        help=
        "displays a full analysis without reducing to chord roots (default: both)"
    )
    parser.add_option(
        '-r',
        '--realize',
        dest="realize",
        action="store",
        help=
        "realize the chord sequence as a midi file (very basic and horrible realization)"
    )
    parser.add_option(
        '--rands',
        '--raphsto',
        dest="raphsto",
        action="store_true",
        help=
        "displays analysis in the style of the annotations added to MIDI files by the original implementation"
    )
    parser.add_option(
        '--lyrics',
        dest="lyrics",
        action="store_true",
        help="include the chord labels as lyric events in the midi file",
        default=False)
    parser.add_option(
        '-m',
        '--model-type',
        dest="model_type",
        action="store",
        help="select a model type: one of %s (default: standard)" %
        ", ".join(mt for mt in MODEL_TYPES.keys()),
        default="standard")
    options, arguments = parse_args_with_config(parser)

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model name and an input midi file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    if options.model_type not in MODEL_TYPES:
        print >> sys.stderr, "Model type must be one of: %s" % ", ".join(
            mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]

    # Load the model
    model = model_cls.load_model(model_name)

    mid = read_midifile(filename)
    bar = mid.resolution * options.time_unit
    handler = MidiHandler(mid,
                          time_unit=options.time_unit,
                          tick_offset=options.tick_offset,
                          remove_drums=options.remove_drums)
    # Decode using the model to get a list of states
    state_changes = model.label(handler)
    states, times = zip(*state_changes)

    if options.chords:
        print "\n".join("%s (bar %d)" % (format_state_as_chord(st),time/bar) \
                                            for st,time in state_changes)
    elif options.analysis:
        print "\n".join("%s (bar %d)" % (format_state(st),time/bar) \
                                            for st,time in state_changes)
    elif options.raphsto:
        print "\n".join(format_state_as_raphsto(st, (time/bar)) \
                                            for st,time in state_changes)
    else:
        print "\n".join("%s%s(bar %d)" % \
                                    (format_state(st).ljust(15),
                                     format_state_as_chord(st).ljust(7),
                                     time/bar) for st,time in state_changes)

    if options.realize is not None:
        # Realize as a midi file
        real = ChordSequenceRealizer(states,
                                     resolution=mid.resolution,
                                     times=times,
                                     chord_length=options.time_unit,
                                     text_events=options.lyrics)
        stream = real.generate(overlay=mid, offset=options.tick_offset)
        write_midifile(stream, options.realize)
예제 #26
0
def main():
    usage = "%prog [options] <model-name> <midi-file>"
    description = "Assigns harmonic labels to a midi file using a trained " "Raphsto model"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "-t",
        "--time-unit",
        dest="time_unit",
        action="store",
        type="float",
        help="number of beats to take as the basic unit (chunk size) for labelling",
        default=2,
    )
    parser.add_option(
        "-o",
        "--tick-offset",
        dest="tick_offset",
        action="store",
        type="int",
        help="time in midi ticks at which to start labelling",
        default=0,
    )
    parser.add_option(
        "-d",
        "--remove-drums",
        dest="remove_drums",
        action="store_true",
        help="ignores any channel 10 events in the midi file",
        default=False,
    )
    parser.add_option(
        "-c",
        "--chords",
        dest="chords",
        action="store_true",
        help="displays just chord roots instead of full analysis (default: both)",
    )
    parser.add_option(
        "-a",
        "--analysis",
        dest="analysis",
        action="store_true",
        help="displays a full analysis without reducing to chord roots (default: both)",
    )
    parser.add_option(
        "-r",
        "--realize",
        dest="realize",
        action="store",
        help="realize the chord sequence as a midi file (very basic and horrible realization)",
    )
    parser.add_option(
        "--rands",
        "--raphsto",
        dest="raphsto",
        action="store_true",
        help="displays analysis in the style of the annotations added to MIDI files by the original implementation",
    )
    parser.add_option(
        "--lyrics",
        dest="lyrics",
        action="store_true",
        help="include the chord labels as lyric events in the midi file",
        default=False,
    )
    parser.add_option(
        "-m",
        "--model-type",
        dest="model_type",
        action="store",
        help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()),
        default="standard",
    )
    options, arguments = parse_args_with_config(parser)

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model name and an input midi file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    if options.model_type not in MODEL_TYPES:
        print >> sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]

    # Load the model
    model = model_cls.load_model(model_name)

    mid = read_midifile(filename)
    bar = mid.resolution * options.time_unit
    handler = MidiHandler(
        mid, time_unit=options.time_unit, tick_offset=options.tick_offset, remove_drums=options.remove_drums
    )
    # Decode using the model to get a list of states
    state_changes = model.label(handler)
    states, times = zip(*state_changes)

    if options.chords:
        print "\n".join("%s (bar %d)" % (format_state_as_chord(st), time / bar) for st, time in state_changes)
    elif options.analysis:
        print "\n".join("%s (bar %d)" % (format_state(st), time / bar) for st, time in state_changes)
    elif options.raphsto:
        print "\n".join(format_state_as_raphsto(st, (time / bar)) for st, time in state_changes)
    else:
        print "\n".join(
            "%s%s(bar %d)" % (format_state(st).ljust(15), format_state_as_chord(st).ljust(7), time / bar)
            for st, time in state_changes
        )

    if options.realize is not None:
        # Realize as a midi file
        real = ChordSequenceRealizer(
            states, resolution=mid.resolution, times=times, chord_length=options.time_unit, text_events=options.lyrics
        )
        stream = real.generate(overlay=mid, offset=options.tick_offset)
        write_midifile(stream, options.realize)
예제 #27
0
def prepare_evaluation_options(usage=None, description=None, 
        optparse_options=[], check_args=None, optparse_groups=[]):
    """
    Various tasks common to the initial part of the evaluation routine
    scripts (C{models/eval.py}).
    
    @todo: This is not used any more. Remove it, after checking it's definitely 
        not used.
    
    @param usage: the optparse usage string
    @param description: the optparse description string
    @type optparse_options: list of tuples
    @param optparse_options: (args,kwargs) pairs to add additional 
        options to the optparse parser.
    @type check_args: function
    @param check_args: function to take the command-line arguments and 
        check them. This will be called early in the script. Must 
        return a tuple of (1) the model name (or model basename) that 
        will be used in the partition model names and (2) the input 
        filename to get sequences from.
    @type optparse_groups: list of pairs
    @param optparse_groups: specificatios for option groups to add to the 
        optparse option parser. The first of each pair is a tuple of 
        args to C{OptionGroup}'s init (excluding the first). 
        The second is a list of options 
        each formatted as C{optparse_options}.
        
    @rtype: tuple
    @return: (1) list of (sequences,model_name,partition_index) tuples
        for each partition; (2) list of lists containing the sequence 
        ids for each partition; (3) optparse options; (4) optparse 
        arguments.
    
    """
    import sys
    from optparse import OptionParser, OptionGroup
    from jazzparser.utils.config import parse_args_with_config
    from jazzparser.utils.loggers import init_logging
    from jazzparser.data.db_mirrors import SequenceIndex
    from jazzparser.utils.data import partition
    
    parser = OptionParser(usage=usage, description=description)
    group = OptionGroup(parser, "Input", "Input data and partitioning for evaluation")
    group.add_option("-s", "--sequence", dest="sequence", action="store", help="limit the evaluation to just one sequence, with the given index in the input file")
    group.add_option("--partition", dest="partition", action="store", help="restrict to only one partition of the data. Specify as i/n, where i is the partition number and n the total number of partitions.")
    group.add_option("-p", "--partitions", dest="partitions", type="int", action="store", help="test on all n partitions of the data, using a different model for each. Will look for a model <NAME>i, where <NAME> is the given model name and i the partition number.")
    parser.add_option_group(group)
    
    parser.add_option("--debug", dest="debug", action="store_true", help="show debugging output")
    
    # Add the options according to their specs
    for args,kwargs in optparse_options:
        parser.add_option(*args, **kwargs)
        
    # Add groups and their options
    for group_args,options in optparse_groups:
        # Check whether the group already exists
        same_titles = [g for g in parser.option_groups if g.title == group_args[0]]
        if same_titles:
            group = same_titles[0]
        else:
            group = OptionGroup(parser, *group_args)
            parser.add_option_group(group)
        # Add options to this group
        for args,kwargs in options:
            group.add_option(*args, **kwargs)
    options, arguments = parse_args_with_config(parser)
    
    if check_args is None:
        raise ValueError, "could not check arguments and get model "\
            "name. check_args must not be None"
    model_name,input_filename = check_args(arguments)
        
    if options.debug:
        # Set the log level to debug and do the standard logging init
        init_logging(logging.DEBUG)
    else:
        init_logging()
        
    # Load up sequences
    seqs = SequenceIndex.from_file(input_filename)
        
    def _get_seq_by_index(index):
        seq = seqs.sequence_by_index(index)
        if seq is None:
            print >>sys.stderr, "There are only %d sequences" % len(seqs)
            sys.exit(1)
        return seq
    
    ################ Data partitioning ####################
    if options.partitions is not None:
        # Divide the data up into n partitions and use a different model name for each
        total_parts = options.partitions
        print >>sys.stderr, "Cross validation: dividing test data into %d partitions" % total_parts
        partitions = [(part,"%s%d" % (model_name,i), i) for i,part in enumerate(partition(seqs.sequences, total_parts))]
        part_ids = partition(seqs.ids, total_parts)
    elif options.partition is not None:
        # Just select one partition
        # Split up the argument to get two integers
        parti,total_parts = options.partition.split("/")
        parti,total_parts = int(parti), int(total_parts)
        print >>sys.stderr, "Restricting sequences to %d-way partition %d" % (total_parts,parti)
        # Get a list of sequence indices to restrict our set to
        part_ids = partition(seqs.ids, total_parts)[parti]
        partitions = [ [(part,"%s%d" % (model_name,i), i) for i,part in enumerate(partition(seqs.sequences, total_parts))][parti] ]
    elif options.sequence is not None:
        # Just select one sequence
        seq = _get_seq_by_index(int(options.sequence))
        partitions = [( [seq], model_name, 0 )]
        part_ids = [seq.id]
    else:
        # Don't partition the sequences
        partitions = [(seqs.sequences, model_name,0)]
        part_ids = [None]
    
    return partitions,part_ids,options,arguments
예제 #28
0
def main():
    usage = "%prog [options] <model_name> <input-file>"
    description = "Trains a model for the RaphSto chord labelling "\
        "algorithm on a file that contains a list of midi files with "\
        "training options"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    parser.add_option('--proc', '--processes', dest="processes", action="store", type="int", help="number of parallel processes to spawn for the training. Use -1 to spawn one per training sequence (after splitting: see split_length)", default=1)
    parser.add_option('--max-length', dest="max_length", action="store", type="int", help="limits the length of the training midi sequences in chunks")
    parser.add_option('--split-length', dest="split_length", action="store", type="int", help="limits the length of the training midi sequences in chunks, but instead of throwing away everything after the first N chunks, splits it off as if it were starting a new sequence. This is good for multiprocessing, since many short sequences can be multitasked, whilst few long ones cannot")
    parser.add_option('--min-length', dest="min_length", action="store", type="int", help="ignores any sequences under this number of chunks. This is useful with --split-length, which can leave very short sequences from the end of a split sequence")
    parser.add_option('--progress-out', dest="progress_out", action="store", help="output logging info to a file instead of the command line")
    parser.add_option('--init-model', dest="init_model", action="store", help="initialize the model using parameters from an already trained model")
    parser.add_option('--init-ctrans', dest="init_ctrans", action="store", help="initialize the chord transition distribution using these parameters. Comma-separated list of params given as C0->C1-P, where C0 and C1 are chords (I, II, etc) and P is a float probability")
    parser.add_option('--chord-set', dest="chord_set", action="store", help="use a chord set other than the default. Use value 'help' to see a list. Has no effect in combination with --init-model, since the old model's chord set will be used")
    parser.add_option('-m', '--model-type', dest="model_type", action="store", help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()), default="standard")
    options, arguments = parse_args_with_config(parser)
    
    if options.opts is not None and options.opts == "help":
        print options_help_text(RaphstoBaumWelchTrainer.OPTIONS, intro="Training options for Raphael and Stoddard HMMs")
        sys.exit(0)
    opts = ModuleOption.process_option_string(options.opts)
    
    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]
    
    print >>sys.stderr, "Raphsto training beginning at %s" % datetime.now().isoformat(' ')
    # Create a logger to output the progress of the training to stdout or a file
    if options.progress_out is not None:
        stdout = False
        logfile = options.progress_out
        print >>sys.stderr, "Outputing logging info to %s" % logfile
    else:
        stdout = True
        logfile = None
        print >>sys.stderr, "Outputing logging to stdout"
    logger = create_logger(name="raphsto_train", filename=logfile, stdout=stdout)
    logger.info("Raphael and Stoddard HMM model training")
        
    if options.model_type not in MODEL_TYPES:
        print >>sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]
    
    if options.chord_set == "help":
        print "Available chord sets: %s" % ", ".join(constants.CHORD_SETS.keys())
        sys.exit(0)
    elif options.chord_set is not None:
        # Check this chord set exists
        if options.chord_set not in constants.CHORD_SETS:
            print >>sys.stderr, "Chord set '%s' does not exist" % options.chord_set
            sys.exit(1)
        else:
            logger.info("Using chord set '%s'" % options.chord_set)
    
    
    # Read in the training data
    midis = InputSourceFile(filename)
    handlers = midis.get_handlers()
    logger.info("Reading in %d midi files..." % len(midis.inputs))
    training_data = []
    for i,mh in enumerate(handlers):
        logger.info("%s: %s" % (i,midis.inputs[i][0]))
        emissions = mh.get_emission_stream()[0]
        if options.max_length is not None and len(emissions) > options.max_length:
            logger.info("Truncating file %d to %d chunks (was %d)" % \
                                    (i,options.max_length,len(emissions)))
            emissions = emissions[:options.max_length]
        if options.split_length is not None:
            logger.info("Splitting sequence %d into sequence no longer "\
                                "than %d chunks" % (i,options.split_length))
            # Split up the sequence if it's too long
            while len(emissions) > options.split_length:
                training_data.append(emissions[:options.split_length])
                emissions = emissions[options.split_length:]
        training_data.append(emissions)
    
    if options.min_length is not None:
        # Make sure there are no sequences under the minimum length
        # Just throw away any that are
        before_chuck = len(training_data)
        training_data = [seq for seq in training_data if len(seq) >= options.min_length]
        if len(training_data) != before_chuck:
            logger.info("Threw away %d short sequences (below %d chunks)" % \
                    ((before_chuck-len(training_data)), options.min_length))
    
    logger.info("Training on %d sequences. Lengths: %s" % \
                    (len(training_data), 
                     ", ".join(str(len(seq)) for seq in training_data)))
    
    if options.partitions is not None:
        parts = holdout_partition(training_data, options.partitions)
        models = [("%s%d" % (model_name,num),data) for num,data in enumerate(parts)]
    else:
        models = [(model_name,training_data)]
        
    # Number of processes to use
    if options.processes == -1:
        # Special value: means number of training sequences (one process per sequence)
        processes = len(training_data)
    else:
        processes = options.processes
    
    for part_name,data in models:
        # Instantiate a fresh model with this name
        logger.info("Training model '%s' on %d midis" % (part_name, len(data)))
        if options.init_model is not None:
            logger.info("Initializing using parameters from model '%s'" % \
                options.init_model)
            # Load an already trained model as initialization
            model = model_cls.initialize_existing_model(options.init_model, \
                model_name=part_name)
        else:
            # TODO: make these probs an option
            ctype_params = (0.5, 0.3, 0.2)
            logger.info("Initializing to naive chord types using parameters: "\
                "%s, %s, %s" % ctype_params)
            init_kwargs = { 'model_name' : part_name }
            if options.chord_set is not None:
                # Specify a chord set for the model
                init_kwargs['chord_set'] = options.chord_set
            model = model_cls.initialize_chord_types(ctype_params, **init_kwargs)
            
            # Initialize the chord transition probabilities if given
            if options.init_ctrans is not None:
                logger.info("Initializing chord transition distribution to %s" \
                    % options.init_ctrans)
                model.set_chord_transition_probabilities(options.init_ctrans)
        # Retrain it with the loaded data
        trainer = model_cls.get_trainer()(model, options=opts)
        trainer.train(data, logger=logger, processes=processes, save_intermediate=True)
    print >>sys.stderr, "Training terminating at %s" % datetime.now().isoformat(' ')
예제 #29
0
def main():
    usage = "%prog [<options>] <model-name>"
    description = "Queries an ngram model interactively"
    optparser = OptionParser(usage=usage, description=description)
    # Read in command line options and args
    options, arguments = parse_args_with_config(optparser)

    if len(arguments) < 1:
        print "Specify a model name"
        sys.exit(1)
    model_name = arguments[0]

    # Load the ngram model
    ngmodel = NgramTaggerModel.load_model(model_name)
    model = ngmodel.model

    input_getter = input_iterator(">> ")
    # Load the shell history if possible
    try:
        readline.read_history_file(settings.NGRAM_QUERY_HISTORY_FILE)
    except IOError:
        # No history file found. No problem
        pass
    print "N-gram model query"
    print "Loaded", model_name
    print
    print "Transition:      t <state> <state-1> ... <state-n>"
    print "Emission:        e <chord> <state>"
    print "State domain:    states"
    print "Emission domain: ems"

    def _check_state(s):
        if s not in model.label_dom + [None]:
            raise QueryError, "invalid state label: %s" % s

    for query in input_getter:
        query = query.rstrip("\n").strip()
        if query:
            try:
                if query.startswith("states"):
                    print ", ".join(model.label_dom)
                elif query.startswith("ems"):
                    print ", ".join(model.emission_dom)
                elif query.startswith("t"):
                    # Transition prob query
                    states = query.split()[1:]
                    if len(states) != model.order:
                        print "Ngram must have length %d" % model.order
                        continue
                    states = [s if s != "None" else None for s in states]
                    # Verify all these states
                    for state in states:
                        _check_state(state)
                    # Get the transition probability
                    prob = model.transition_probability_debug(*states)
                    print "P(Qi = %s | %s) = %f" % (states[0], ", ".join([
                        "Q(i-%d) = %s" % (i + 1, s)
                        for (i, s) in enumerate(states[1:])
                    ]), prob)
                elif query.startswith("e"):
                    # Emission prob query
                    em_state = query.split()[1:]
                    if len(em_state) != 2:
                        print "Emission query must consist of a chord and a state"
                        continue
                    em, state = em_state
                    # Check the state label's valid
                    _check_state(state)
                    # Get the emission probability
                    prob = model.emission_probability(em, state)
                    # Print out the probability
                    print "P(Oi = %s | Qi = %s) = %f" % (em, state, prob)
                else:
                    print "Invalid query: %s" % query
            except QueryError, err:
                print "Check your query: %s" % err
            except Exception, err:
                print "Error processing query: %s" % err
예제 #30
0
def main():
    usage = "%prog [options] <model_name> <input-file>"
    description = (
        "Trains a model for the RaphSto chord labelling "
        "algorithm on a file that contains a list of midi files with "
        "training options"
    )
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "-p",
        "--partitions",
        dest="partitions",
        action="store",
        type="int",
        help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.",
    )
    parser.add_option(
        "--opts",
        dest="opts",
        action="store",
        help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.",
    )
    parser.add_option(
        "--proc",
        "--processes",
        dest="processes",
        action="store",
        type="int",
        help="number of parallel processes to spawn for the training. Use -1 to spawn one per training sequence (after splitting: see split_length)",
        default=1,
    )
    parser.add_option(
        "--max-length",
        dest="max_length",
        action="store",
        type="int",
        help="limits the length of the training midi sequences in chunks",
    )
    parser.add_option(
        "--split-length",
        dest="split_length",
        action="store",
        type="int",
        help="limits the length of the training midi sequences in chunks, but instead of throwing away everything after the first N chunks, splits it off as if it were starting a new sequence. This is good for multiprocessing, since many short sequences can be multitasked, whilst few long ones cannot",
    )
    parser.add_option(
        "--min-length",
        dest="min_length",
        action="store",
        type="int",
        help="ignores any sequences under this number of chunks. This is useful with --split-length, which can leave very short sequences from the end of a split sequence",
    )
    parser.add_option(
        "--progress-out",
        dest="progress_out",
        action="store",
        help="output logging info to a file instead of the command line",
    )
    parser.add_option(
        "--init-model",
        dest="init_model",
        action="store",
        help="initialize the model using parameters from an already trained model",
    )
    parser.add_option(
        "--init-ctrans",
        dest="init_ctrans",
        action="store",
        help="initialize the chord transition distribution using these parameters. Comma-separated list of params given as C0->C1-P, where C0 and C1 are chords (I, II, etc) and P is a float probability",
    )
    parser.add_option(
        "--chord-set",
        dest="chord_set",
        action="store",
        help="use a chord set other than the default. Use value 'help' to see a list. Has no effect in combination with --init-model, since the old model's chord set will be used",
    )
    parser.add_option(
        "-m",
        "--model-type",
        dest="model_type",
        action="store",
        help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()),
        default="standard",
    )
    options, arguments = parse_args_with_config(parser)

    if options.opts is not None and options.opts == "help":
        print options_help_text(RaphstoBaumWelchTrainer.OPTIONS, intro="Training options for Raphael and Stoddard HMMs")
        sys.exit(0)
    opts = ModuleOption.process_option_string(options.opts)

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    print >> sys.stderr, "Raphsto training beginning at %s" % datetime.now().isoformat(" ")
    # Create a logger to output the progress of the training to stdout or a file
    if options.progress_out is not None:
        stdout = False
        logfile = options.progress_out
        print >> sys.stderr, "Outputing logging info to %s" % logfile
    else:
        stdout = True
        logfile = None
        print >> sys.stderr, "Outputing logging to stdout"
    logger = create_logger(name="raphsto_train", filename=logfile, stdout=stdout)
    logger.info("Raphael and Stoddard HMM model training")

    if options.model_type not in MODEL_TYPES:
        print >> sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES)
        sys.exit(1)
    model_cls = MODEL_TYPES[options.model_type]

    if options.chord_set == "help":
        print "Available chord sets: %s" % ", ".join(constants.CHORD_SETS.keys())
        sys.exit(0)
    elif options.chord_set is not None:
        # Check this chord set exists
        if options.chord_set not in constants.CHORD_SETS:
            print >> sys.stderr, "Chord set '%s' does not exist" % options.chord_set
            sys.exit(1)
        else:
            logger.info("Using chord set '%s'" % options.chord_set)

    # Read in the training data
    midis = InputSourceFile(filename)
    handlers = midis.get_handlers()
    logger.info("Reading in %d midi files..." % len(midis.inputs))
    training_data = []
    for i, mh in enumerate(handlers):
        logger.info("%s: %s" % (i, midis.inputs[i][0]))
        emissions = mh.get_emission_stream()[0]
        if options.max_length is not None and len(emissions) > options.max_length:
            logger.info("Truncating file %d to %d chunks (was %d)" % (i, options.max_length, len(emissions)))
            emissions = emissions[: options.max_length]
        if options.split_length is not None:
            logger.info("Splitting sequence %d into sequence no longer " "than %d chunks" % (i, options.split_length))
            # Split up the sequence if it's too long
            while len(emissions) > options.split_length:
                training_data.append(emissions[: options.split_length])
                emissions = emissions[options.split_length :]
        training_data.append(emissions)

    if options.min_length is not None:
        # Make sure there are no sequences under the minimum length
        # Just throw away any that are
        before_chuck = len(training_data)
        training_data = [seq for seq in training_data if len(seq) >= options.min_length]
        if len(training_data) != before_chuck:
            logger.info(
                "Threw away %d short sequences (below %d chunks)"
                % ((before_chuck - len(training_data)), options.min_length)
            )

    logger.info(
        "Training on %d sequences. Lengths: %s"
        % (len(training_data), ", ".join(str(len(seq)) for seq in training_data))
    )

    if options.partitions is not None:
        parts = holdout_partition(training_data, options.partitions)
        models = [("%s%d" % (model_name, num), data) for num, data in enumerate(parts)]
    else:
        models = [(model_name, training_data)]

    # Number of processes to use
    if options.processes == -1:
        # Special value: means number of training sequences (one process per sequence)
        processes = len(training_data)
    else:
        processes = options.processes

    for part_name, data in models:
        # Instantiate a fresh model with this name
        logger.info("Training model '%s' on %d midis" % (part_name, len(data)))
        if options.init_model is not None:
            logger.info("Initializing using parameters from model '%s'" % options.init_model)
            # Load an already trained model as initialization
            model = model_cls.initialize_existing_model(options.init_model, model_name=part_name)
        else:
            # TODO: make these probs an option
            ctype_params = (0.5, 0.3, 0.2)
            logger.info("Initializing to naive chord types using parameters: " "%s, %s, %s" % ctype_params)
            init_kwargs = {"model_name": part_name}
            if options.chord_set is not None:
                # Specify a chord set for the model
                init_kwargs["chord_set"] = options.chord_set
            model = model_cls.initialize_chord_types(ctype_params, **init_kwargs)

            # Initialize the chord transition probabilities if given
            if options.init_ctrans is not None:
                logger.info("Initializing chord transition distribution to %s" % options.init_ctrans)
                model.set_chord_transition_probabilities(options.init_ctrans)
        # Retrain it with the loaded data
        trainer = model_cls.get_trainer()(model, options=opts)
        trainer.train(data, logger=logger, processes=processes, save_intermediate=True)
    print >> sys.stderr, "Training terminating at %s" % datetime.now().isoformat(" ")
예제 #31
0
def main():
    usage = "%prog [options] <midi-file>"
    description = "Divides a midi file into chunks, with a given size and "\
        "offset, and plays "\
        "the chunks consecutively, with a gap between each."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-t',
        '--time-unit',
        dest="time_unit",
        action="store",
        type="float",
        help=
        "size of chunks in crotchet beats (according to the midi file's resolution)",
        default=4)
    parser.add_option('-o',
                      '--tick-offset',
                      dest="tick_offset",
                      action="store",
                      type="int",
                      help="offset of the first chunk in midi ticks",
                      default=0)
    parser.add_option(
        '-g',
        '--gap',
        dest="gap",
        action="store",
        type="float",
        help=
        "time to wait between playing each chunk in seconds (potentially float). It will take some time to load the chunk and the sequencer usually pauses before reporting it's done: this is not included in this value",
        default=0.0)
    parser.add_option('-p',
                      '--print',
                      dest="print_events",
                      action="store_true",
                      help="print out all events for each chunk")
    parser.add_option('--pno',
                      '--print-note-ons',
                      dest="print_note_ons",
                      action="store_true",
                      help="print only note-on events")
    parser.add_option(
        '--force-res',
        dest="force_res",
        action="store",
        type="int",
        help=
        "force the midi file's resolution to be the given value, rather than using that read from the file"
    )
    parser.add_option('-s',
                      '--start',
                      dest="start",
                      action="store",
                      type="int",
                      help="chunk number to start at",
                      default=0)
    options, arguments = parse_args_with_config(parser)

    filename = arguments[0]

    # Load up the input midi file
    infile = read_midifile(filename, force_resolution=options.force_res)
    handler = MidiHandler(infile,
                          time_unit=options.time_unit,
                          tick_offset=options.tick_offset)
    slices = handler.get_slices()

    # Start at the requested chunk
    slices = slices[options.start:]

    print "Playing %d-beat chunks with a %d-tick offset" % (
        options.time_unit, options.tick_offset)
    if options.start > 0:
        print "Start from chunk %d" % options.start
    print "Total chunks: %d" % len(slices)
    print "Ctrl+C to exit"
    print

    try:
        for i, slc in enumerate(slices):
            strm = slc.to_event_stream(cancel_playing=True)
            print "Playing chunk %d: %d-%d (%d events)" % (
                i, slc.start, slc.end, len(strm.trackpool))
            if options.print_events:
                print "\n".join("  %s" % ev for ev in sorted(strm.trackpool))
            elif options.print_note_ons:
                print "\n".join("  %s" % ev for ev in sorted(strm.trackpool) \
                                                    if type(ev) is NoteOnEvent)

            play_stream(strm, block=True)
            if options.gap > 0.0:
                print "  Waiting %s seconds..." % options.gap
                time.sleep(options.gap)
    except KeyboardInterrupt:
        print "Exiting"
예제 #32
0
def main():
    usage = "%prog [<options>] <model-name> <training-input>"
    description = "Training of PCFG models."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \
        help="Number of partitions to divide the data into. "\
            "For train, divides the input file, trains a model on each "\
            "partition's complement and appends partition number to "\
            "the model names. For del, appends partition numbers to model "\
            "names and deletes all the models. Recache does similarly. "\
            "Has no effect for parse.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options")
    parser.add_option("--debug", dest="debug", action="store_true", help="Output verbose logging information to stderr")
    parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.")
    options, arguments = parse_args_with_config(parser)
    
    if options.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.WARN
    # Create a logger for training
    logger = create_logger(log_level = log_level,
                  name = "training",
                  stderr = True)
    
    # Load a grammar
    grammar = get_grammar(options.grammar)
    # Get the pcfg model class for the formalism
    PcfgModel = grammar.formalism.PcfgModel
        
    # Parse the option string
    if options.training_opts is None:
        opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(PcfgModel.TRAINING_OPTIONS, 
                                            intro="Training options for PCFGs")
        sys.exit(0)
    else:
        opts = ModuleOption.process_option_dict(
                    ModuleOption.process_option_string(options.training_opts),
                    PcfgModel.TRAINING_OPTIONS)
    
    if len(arguments) == 0:
        print >>sys.stderr, "Specify a model name"
        models = PcfgModel.list_models()
        print >>sys.stderr, "Available models: %s" % ", ".join(models)
        sys.exit(1)
    model_name = arguments[0]
    print "Model base name:", model_name
    
    if options.partitions is not None:
        parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)]
    else:
        parts = [(None, model_name)]
    
    if len(arguments) < 2:
        print >>sys.stderr, "Specify an input file to read sequence data from"
        sys.exit(1)
    # Read in the training data from the given file
    seqs = SequenceIndex.from_file(arguments[1])
    
    if options.partitions is not None:
        # Prepare each training partition
        datasets = holdout_partition(seqs.sequences, options.partitions)
    else:
        datasets = [seqs.sequences]
        
    for dataset,(parti,part_model) in zip(datasets,parts):
        # Train the named model on the sequence data
        model = PcfgModel.train(part_model, dataset, opts, grammar=grammar, 
                                logger=logger)
        model.save()
        print "Trained model", part_model
예제 #33
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Trains a chord labeling model using the given "\
        "input data. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="append", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Logging output
    parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end")
    options, arguments = parse_args_with_config(parser)
    
    grammar = Grammar()
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif "help" in [opt.lower() for opt in options.training_opts]:
        print options_help_text(HPChordLabeler.TRAINING_OPTIONS, intro="Training options:")
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_string(options.training_opts)
        
    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]
    
    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(single=False, bulk=True))
    
    # Only partition the chord data, not the MIDI data
    if options.partitions is not None and not \
            (isinstance(input_data, MidiTaggerTrainingBulkInput) and \
             input_data.chords is not None):
        print >>sys.stderr, "Can only partition chord data and no chord data "\
            "was supplied"
        sys.exit(1)
    
    if options.partitions:
        # The input includes chord training data
        parts = input_data.chords.get_partitions(options.partitions)[1]
        models = [("%s%d" % (model_name,num),chord_data) \
            for num,chord_data in enumerate(parts)]
    else:
        models = [(model_name,None)]
    
    for part_name,chord_data in models:
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None
        
        # Create a fresh model with this name
        model = HPChordLabeler.train(input_data, part_name, 
                                     logger=logger, 
                                     options=training_opts,
                                     chord_data=chord_data)
        print "Trained model %s" % (part_name)
예제 #34
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Trains a chord labeling model using the given "\
        "input data. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="append",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Logging output
    parser.add_option(
        '--log',
        dest="log",
        action="store",
        help=
        "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end"
    )
    options, arguments = parse_args_with_config(parser)

    grammar = Grammar()

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif "help" in [opt.lower() for opt in options.training_opts]:
        print options_help_text(HPChordLabeler.TRAINING_OPTIONS,
                                intro="Training options:")
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_string(
            options.training_opts)

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(
                                        single=False, bulk=True))

    # Only partition the chord data, not the MIDI data
    if options.partitions is not None and not \
            (isinstance(input_data, MidiTaggerTrainingBulkInput) and \
             input_data.chords is not None):
        print >>sys.stderr, "Can only partition chord data and no chord data "\
            "was supplied"
        sys.exit(1)

    if options.partitions:
        # The input includes chord training data
        parts = input_data.chords.get_partitions(options.partitions)[1]
        models = [("%s%d" % (model_name,num),chord_data) \
            for num,chord_data in enumerate(parts)]
    else:
        models = [(model_name, None)]

    for part_name, chord_data in models:
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None

        # Create a fresh model with this name
        model = HPChordLabeler.train(input_data,
                                     part_name,
                                     logger=logger,
                                     options=training_opts,
                                     chord_data=chord_data)
        print "Trained model %s" % (part_name)
예제 #35
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Loads a chord labeling model and uses it to assign chord "\
        "labels to the given MIDI file."
    parser = OptionParser(usage=usage, description=description)
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='segmidi')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Labeling options
    parser.add_option(
        "--labeler-options",
        "--lopt",
        dest="labeler_options",
        action="append",
        help=
        "options for the labeler. Type '--lopt help' for a list of available options."
    )
    parser.add_option(
        "--no-key",
        "--nk",
        dest="no_key",
        action="store_true",
        help="merge together labels with the same key (same as --lopt nokey)")
    # Output options
    parser.add_option(
        "--single",
        "-1",
        dest="single",
        action="store_true",
        help=
        "show only one chord per time segment (same as --lopt n=1, but formats the output in a simpler way)"
    )
    parser.add_option(
        '-r',
        '--realize',
        dest="realize",
        action="store",
        help="realize the chord sequence as a midi file, overlaid on the input"
    )
    parser.add_option(
        '--chords-only',
        dest="chords_only",
        action="store_true",
        help=
        "only realize the chords: don't overlay on the input midi (only works with -r)"
    )
    options, arguments = parse_args_with_config(parser)

    if options.labeler_options is not None and "help" in options.labeler_options:
        print options_help_text(HPChordLabeler.LABELING_OPTIONS,
                                intro="Options for HP chord labeler")
        sys.exit(0)

    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model name and an input "\
            "(MIDI) data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    # Process the labeler options
    lopt_dict = ModuleOption.process_option_string(options.labeler_options)
    if options.single:
        # No point in getting more than one label, since we only display one
        lopt_dict['n'] = 1
    if options.no_key:
        # Just set the nokey option
        lopt_dict['nokey'] = True

    # Check they're valid before doing anything else
    HPChordLabeler.process_labeling_options(lopt_dict)

    input_data = command_line_input(filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=['segmidi', 'bulk-segmidi'])
    bulk = not is_bulk_type(type(input_data))
    if bulk:
        input_data = [input_data]

    for i, data in enumerate(input_data):
        input_stream = data.stream
        print "Read midi data in %d segments" % len(data)

        # Load the model
        model = HPChordLabeler.load_model(model_name)
        # Perform labeling
        labels = model.label(data, options=lopt_dict)
        # Try labeling as it will be passed to the tagger
        labs = model.label_lattice(data, options=lopt_dict)

        if options.single:
            # Special output for single label output
            print ", ".join(["%s" % timelabs[0][0] for timelabs in labels])
        else:
            # Print out the labels for each timestep
            for time, timelabs in enumerate(labels):
                print "%d: %s" % (time, ", ".join([
                    "%s (%.2e)" % (label, prob) for (label, prob) in timelabs
                ]))

        if options.realize is not None:
            # Get the single best chord label for each time
            best_labels = [timelabs[0][0] for timelabs in labels]
            # Realize as a midi file
            print "Realizing output chord sequence"
            real = ChordSequenceRealizer(best_labels,
                                         model.chord_vocab,
                                         resolution=input_stream.resolution,
                                         chord_length=data.time_unit,
                                         text_events=True)
            if options.chords_only:
                # Don't overlay
                stream = real.generate(offset=data.tick_offset)
            else:
                stream = real.generate(overlay=input_stream,
                                       offset=data.tick_offset)

            if bulk:
                filename = "%s-%d" % (options.realize, i)
            else:
                filename = options.realize
            write_midifile(stream, filename)
예제 #36
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a supertagging model using the given "\
        "input data. Specify a model type (baseline1, etc) and a name to "\
        "identify it. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file. "\
        "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS)
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Logging output
    parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end")
    options, arguments = parse_args_with_config(parser)
    
    grammar = Grammar()
    
    # Get the model type first: we might not need the other args
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
    model_type = arguments[0]
    
    if model_type not in TRAINABLE_MODELS:
        print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \
            (model_type, ", ".join(TRAINABLE_MODELS))
        sys.exit(1)
    if model_type not in TAGGERS:
        print >>sys.stderr, "'%s' isn't a registered model type. Check that "\
            "the name in TRAINABLE_MODELS is correct" % model_type
        sys.exit(1)
    
    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >>sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (tagger_cls.__name__)
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
                            ModuleOption.process_option_string(options.training_opts), 
                            model_cls.TRAINING_OPTIONS)
    
    # Get the rest of the args
    if len(arguments) < 3:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_name = arguments[1]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(single=False, bulk=True))
    
    if options.partitions is not None and options.partitions > 1:
        parts = input_data.get_partitions(options.partitions)[1]
        models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \
                                                num,seqs in enumerate(parts)]
    else:
        models = [(model_name,input_data)]
    
    for part_name,seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None
            
        # Train the model with the loaded data
        model.train(seqs, logger=logger)
        model.save()
        print "Trained model %s" % (part_name)
예제 #37
0
def main():
    set_proc_title("jazzparser")
    ########################################################
    usage = "jazzparser [<options>]"
    description = "The main parser interface for the Jazz Parser"
    ## Process the input options
    optparser = OptionParser(usage=usage, description=description)
    ###
    # File input options
    group = OptionGroup(optparser, "Input", "Input type and location")
    optparser.add_option_group(group)
    group.add_option("--file", "-f", dest="file", action="store", help="use a file to get parser input from. Use --filetype to specify the type of the file.")
    group.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords", default='chords')
    group.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    group.add_option("--index", "--indices", dest="input_index", action="store", help="select individual inputs to process. Specify as a comma-separated list of indices. All inputs are loaded as usual, but only the ith input is processed, for each i in the list")
    group.add_option("--only-load", dest="only_load", action="store_true", help="don't do anything with the inputs, just load and list them. Handy for checking the inputs load and getting their indices")
    group.add_option("--partitions", dest="partitions", action="store", type="int", help="divide the input data into this number of partitions and use a different set of models for each. For any parser, tagger and backoff that takes a 'model' argument, the partition number will be appended to the given value")
    group.add_option("--seq-parts", "--sequence-partitions", dest="sequence_partitions", action="store", help="use a chord sequence index to partition the inputs. Input type (bulk) must support association of the inputs with chord sequences by id. Sequences in the given sequence index file are partitioned n ways (--partitions) and the inputs are processed according to their associated sequence.")
    group.add_option("--continue", "--skip-done", dest="skip_done", action="store_true", help="skip any inputs for which a readable results file already exists. This is useful for continuing a bulk job that was stopped in the middle")
    ###
    group = OptionGroup(optparser, "Parser", "Parser, supertagger and backoff parser")
    optparser.add_option_group(group)
    group.add_option("-d", "--derivations", dest="derivations", action="store_true", help="keep derivation logs during parse.")
    group.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.")
    # Parser options
    group.add_option("-p", "--parser", dest="parser", action="store", help="use the named parser algorithm instead of the default. Use '-p help' to see the list of available parsers. Default: %s" % settings.DEFAULT_PARSER, default=settings.DEFAULT_PARSER)
    group.add_option("--popt", "--parser-options", dest="popts", action="append", help="specify options for the parser. Type '--popt help', using '--parser <name>' to select a parser module, to get a list of options.")
    # Tagger options
    group.add_option("-t", "--tagger", "--supertagger", dest="supertagger", action="store", help="run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s" % settings.DEFAULT_SUPERTAGGER, default=settings.DEFAULT_SUPERTAGGER)
    group.add_option("--topt", "--tagger-options", dest="topts", action="append", help="specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options.")
    # Backoff options
    group.add_option("-b", "--backoff", "--noparse", dest="backoff", action="store", help="use the named backoff model as a backoff if the parser produces no results")
    group.add_option("--bopt", "--backoff-options", "--backoff-options", "--npo", dest="backoff_opts", action="append", help="specify options for the  backoff model. Type '--npo help', using '--backoff <name>' to select a backoff modules, to get a list of options.")
    ###
    # Multiprocessing options
    group = OptionGroup(optparser, "Multiprocessing")
    optparser.add_option_group(group)
    group.add_option("--processes", dest="processes", action="store", type="int", help="number of processes to create to perform parses in parallel. Default: 1, i.e. no process pool. Use -1 to create a process for every input", default=1)
    ###
    # Output options
    group = OptionGroup(optparser, "Output")
    optparser.add_option_group(group)
    group.add_option("--output", dest="output", action="store", help="directory name to output parse results to. A filename specific to the individual input will be appended to this")
    group.add_option("--topn", dest="topn", action="store", type="int", help="limit the number of final results to store in the output file to the top n by probability. By default, stores all")
    group.add_option("--output-opts", "--oopts", dest="output_opts", action="store", help="options that affect the output formatting. Use '--output-opts help' for a list of options.")
    group.add_option("-a", "--atomic-results", dest="atoms_only", action="store_true", help="only include atomic categories in the results.")
    group.add_option("-l", "--latex", dest="latex", action="store_true", help="output all results as Latex source. Used to produce a whole Latex document, but doesn't any more")
    group.add_option("--all-times", dest="all_times", action="store_true", help="display all timing information on semantics in output.")
    group.add_option("-v", "--debug", dest="debug", action="store_true", help="output verbose debugging information.")
    group.add_option("--time", dest="time", action="store_true", help="time how long the parse takes and output with the results.")
    group.add_option("--no-results", dest="no_results", action="store_true", help="don't print out the parse results at the end. Obviously you'll want to make sure they're going to a file (--output). This is useful for bulk parse jobs, where the results produce a lot of unnecessary output")
    group.add_option("--no-progress", dest="no_progress", action="store_true", help="don't output the summary of completed sequences after each one finishes")
    ###
    # Output analysis and harmonical
    group = OptionGroup(optparser, "Output processing", "Output analysis and harmonical")
    optparser.add_option_group(group)
    group.add_option("--harmonical", dest="harmonical", action="store", help="use the harmonical to play the chords justly intoned according to the top result and output to a wave file.")
    group.add_option("--enharmonical", dest="enharmonical", action="store", help="use the harmonical to play the chords in equal temperament and output to a wave file.")
    group.add_option("--midi", dest="midi", action="store_true", help="generate MIDI files from the harmonical, instead of wave files.")
    group.add_option("--tempo", dest="tempo", action="store", type=int, help="tempo to use for the generated music (see --harmonical/--enharmonical). Default: 120", default=120)
    group.add_option("--lh-analysis", dest="lh_analysis", action="store_true", help="output the Longuet-Higgins space interpretation of the semantics for each result.")
    group.add_option("--lh-coordinates", dest="lh_coord", action="store_true", help="like lh-analysis, but displays the coordinates of the points instead of their names.")
    ###
    # Logging options
    group = OptionGroup(optparser, "Logging")
    optparser.add_option_group(group)
    group.add_option("--long-progress", dest="long_progress", action="store_true", help="print a summary of the chart so far after each chord/word has been processed.")
    group.add_option("--progress", "--short-progress", dest="short_progress", action="store_true", help="print a small amount of information out during parsing to indicate progress.")
    group.add_option("--logger", dest="logger", action="store", help="directory to put parser logging in. A filename based on an identifier for each individual input will be appended.")
    ###
    # Shell options
    group = OptionGroup(optparser, "Shell", "Interactive shell for inspecting results and parser state")
    optparser.add_option_group(group)
    group.add_option("-i", "--interactive", dest="interactive", action="store_true", help="enter interactive mode after parsing.")
    group.add_option("--error", dest="error_shell", action="store_true", help="catch any errors, report them and then enter the interactive shell. This also catches keyboard interrupts, so you can use it to halt parsing and enter the shell.")
    
    # Read in command line options and args
    options, clinput = parse_args_with_config(optparser)

    ########################### Option processing ####################
    
    # Get log level option first, so we can start using the logger
    if options.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    # Set up a logger
    init_logging(log_level)
    
    if options.latex:
        settings.OPTIONS.OUTPUT_LATEX = True
    
    if options.logger:
        # Directory
        parse_logger_dir = options.logger
        check_directory(parse_logger_dir)
    else:
        parse_logger_dir = None
    
    ######## Grammar ########
    # Check the grammar actually exists
    grammar_names = get_grammar_names()
    if options.grammar is not None and options.grammar not in grammar_names:
        # This is not a valid grammar name
        logger.error("The grammar '%s' does not exist. Possible "\
            "grammars are: %s." % (options.grammar, ", ".join(grammar_names)))
        return 1
    grammar = get_grammar(options.grammar)
        
    ######## Parser ########
    # Load the requested parser
    from jazzparser.parsers import PARSERS
    if options.parser.lower() == "help":
        print "Available parsers are: %s" % ", ".join(PARSERS)
        return 0
    try:
        parser_cls = get_parser(options.parser)
    except ParserLoadError:
        logger.error("The parser '%s' could not be loaded. Possible "\
            "parsers are: %s" % (options.parser, ", ".join(PARSERS)))
        return 1
        
    # Get parser options
    if options.popts is not None:
        poptstr = options.popts
        if "help" in [s.strip().lower() for s in poptstr]:
            # Output this tagger's option help
            from jazzparser.utils.options import options_help_text
            print options_help_text(parser_cls.PARSER_OPTIONS, intro="Available options for selected parser")
            return 0
        poptstr = ":".join(poptstr)
    else:
        poptstr = ""
    popts = ModuleOption.process_option_string(poptstr)
    # Check that the options are valid
    try:
        parser_cls.check_options(popts)
    except ModuleOptionError, err:
        logger.error("Problem with parser options (--popt): %s" % err)
        return 1