def main(): usage = "%prog [options] <in-file>" description = "Print out stats about the notes in a MIDI file" parser = OptionParser(usage=usage, description=description) parser.add_option('-k', '--key-profile', dest="key_profile", action="store", type="int", help="output a graph of the key profile for the given key as a gnuplot script") options, arguments = parse_args_with_config(parser) if len(arguments) == 0: print "No input MIDI file given" sys.exit(1) filename = arguments[0] # Load the midi file midi = read_midifile(filename) print "Midi file type %d" % midi.format print "Resolution: %d" % midi.resolution print "%d notes" % len(note_ons(midi)) # Get rid of drums midi = simplify(midi, remove_drums=True) notes = note_ons(midi) print "%d non-drum notes" % len(notes) # Analyse the note content pcs = dict([(i,0) for i in range(12)]) for note in notes: pcs[note.pitch % 12] += 1 note_names = dict([ (0, "C"), (1, "C#"), (2, "D"), (3, "D#"), (4, "E"), (5, "F"), (6, "F#"), (7, "G"), (8, "G#"), (9, "A"), (10, "A#"), (11, "B") ]) # Print the notes for pc, count in reversed(sorted(pcs.items(), key=lambda x:x[1])): print "%s: %d" % (note_names[pc], count) if options.key_profile is not None: kp_output_file = "key_profile" pc_names = ["1", "#1/b2", "2", "#2/b3", "3", "4", "#4/b5", "5", "#5/b6", "6", "#6/b7", "7"] # Output the pitch counts key = options.key_profile # Get the pc frequencies pc_freq = [float(pcs[(key+p)%12])/sum(pcs.values()) for p in range(12)] # Output them to a CSV data = "\n".join("%d\t%s\t%f" % (i,name,freq) for (name,freq,i) in zip(pc_names,pc_freq,range(12))) with open("%s.csv" % kp_output_file, 'w') as f: f.write(data) # Output the Gnuplot script gnuplot = """\ set style data lines set nokey set xrange [-1:13] set terminal pdf monochrome set output "key_profile.pdf" set xlabel "Pitch class" plot "key_profile.csv" using 1:3:xticlabel(2) """ with open("%s.p" % kp_output_file, 'w') as f: f.write(gnuplot) # Run Gnuplot call(["gnuplot", "%s.p" % kp_output_file]) print "Gnuplot plot output to %s.p and %s.pdf" % (kp_output_file,kp_output_file)
def main(): usage = "%prog <model-name>" description = "Generate chord sequences from a PCFG model" parser = OptionParser(usage=usage, description=description) parser.add_option("-g", "--grammar", dest="grammar", action="store", \ help="use the named grammar instead of the default.") parser.add_option("-d", "--debug", dest="debug", action="store_true", \ help="output debugging information during generation") options, arguments = parse_args_with_config(parser) if options.debug: logger = create_plain_stderr_logger(log_level=logging.DEBUG) else: logger = create_plain_stderr_logger(log_level=logging.WARN) if len(arguments) < 1: print "Specify a model name" sys.exit(1) model_name = arguments[0] grammar = get_grammar(options.grammar) PcfgModel = grammar.formalism.PcfgModel # Load the trained model model = PcfgModel.load_model(model_name) sequence = model.generate(logger=logger) if sequence is None: print "Model did not generate a sequence" else: print sequence
def main(): usage = "%prog [options] <in-file>" description = "Play music using the Harmonical. This allows you to "\ "play music specified precisely in the tonal space. By default, "\ "plays back the input, but can also output to a file." parser = OptionParser(usage=usage, description=description) parser.add_option('-o', '--output', dest="outfile", action="store", help="output the result to a wave file instead of playing back.") parser.add_option('-m', '--midi', dest="midi", action="store_true", help="generate midi data, not audio. Depends on the input format supporting midi file generation.") options, arguments = parse_args_with_config(parser) filename = arguments[0] # Load up the input file infile = HarmonicalInputFile.from_file(filename) if options.midi: midi = infile.render_midi() if options.outfile is not None: # Output a midi file write_midifile(midi, options.outfile) print >>sys.stderr, "Saved midi data to %s" % options.outfile else: print >>sys.stderr, "Playing..." play_stream(midi, block=True) else: print >>sys.stderr, "Generating audio..." audio = infile.render() if options.outfile is not None: # Output to a file instead of playing save_wave_data(audio, options.outfile) print >>sys.stderr, "Saved data to %s" % options.outfile else: print >>sys.stderr, "Playing..." play_audio(audio, wait_for_end=True)
def main(): usage = "%prog [options] <input-midi> <output-filename>" description = "Cleans up a midi file by getting rid of a load of "\ "stuff that makes the music sound good, but isn't needed "\ "by our algorithms. See options for details." parser = OptionParser(usage=usage, description=description) parser.add_option('-d', '--remove-drums', dest="remove_drums", action="store_true", help="filter out drum tracks", default=False) parser.add_option('-p', '--pc', '--remove-program-change', dest="remove_pc", action="store_true", help="filter out all program change (instrument) events", default=False) parser.add_option('-x', '--remove-text', '--txt', dest="remove_text", action="store_true", help="filter out all text events of any type", default=False) parser.add_option('-o', '--one-track', dest="one_track", action="store_true", help="reduce everything down to one track", default=False) parser.add_option('-t', '--remove-tempo', dest="remove_tempo", action="store_true", help="remove all tempo events", default=False) parser.add_option('-c', '--remove-control', dest="remove_control", action="store_true", help="remove all control change events", default=False) parser.add_option('--ch', '--one-channel', dest="one_channel", action="store_true", help="use only one channel: every event occurs on channel 0", default=False) parser.add_option('--mc', '--remove-misc-control', dest="remove_misc_control", action="store_true", help="filter out a whole load of device control events: aftertouch, channel aftertouch, pitch wheel, sysex, port", default=False) parser.add_option('--rno', '--real-note-offs', dest="real_note_offs", action="store_true", help="replace 0-velocity note-ons with actual note-offs. Some midi files use one, some the other", default=False) parser.add_option('--remove-duplicates', dest="remove_duplicates", action="store_true", help="tidy up at the end to remove any duplicate notes", default=False) parser.add_option('-i', '--invert', dest="invert", action="store_true", help="inverts all options. I.e. applies all filters except those selected by the above options", default=False) parser.add_option('-r', '--remove-channels', dest="remove_channels", action="append", type="int", help="filter out all events of the numbered channel. Use multiple options to filter multiple channels at once") parser.add_option('--resolution', '--res', dest="resolution", action="store", type="int", help="change the resolution of the midi data from that read in from the file to that given") options, arguments = parse_args_with_config(parser) if len(arguments) < 2: print >>sys.stderr, "You must specify an input and output filename" sys.exit(1) in_filename = os.path.abspath(arguments[0]) out_filename = os.path.abspath(arguments[1]) # Read in the midi file mid = read_midifile(in_filename, force_resolution=options.resolution) # Build a dictionary of kwargs to select what operations to apply filters = { 'remove_drums' : options.remove_drums ^ options.invert, 'remove_pc' : options.remove_pc ^ options.invert, 'remove_all_text' : options.remove_text ^ options.invert, 'one_track' : options.one_track ^ options.invert, 'remove_tempo' : options.remove_tempo ^ options.invert, 'remove_control' : options.remove_control ^ options.invert, 'one_channel' : options.one_channel ^ options.invert, 'remove_misc_control' : options.remove_misc_control ^ options.invert, 'real_note_offs' : options.real_note_offs ^ options.invert, } print "Filters to be applied:" if options.remove_channels is not None: print " removing channels: %s" % ", ".join(str(ch) for ch in options.remove_channels) if options.resolution is not None: print " changing resolution to %d" % options.resolution print "\n".join(" %s" % name for (name,val) in filters.items() if val) filters['remove_duplicates'] = options.remove_duplicates print "Filtering..." # Apply channel filters first if options.remove_channels is not None: remove_channels(mid, options.remove_channels) filtered = simplify(mid, **filters) print "Midi output to",out_filename write_midifile(filtered, out_filename)
def main(): usage = "%prog [options] <midi-file>" description = "Divides a midi file into chunks, with a given size and "\ "offset, and print the chunks consecutively." parser = OptionParser(usage=usage, description=description) parser.add_option('-t', '--time-unit', dest="time_unit", action="store", type="float", help="size of chunks in crotchet beats (according to the midi file's resolution)", default=4) parser.add_option('-o', '--tick-offset', dest="tick_offset", action="store", type="int", help="offset of the first chunk in midi ticks", default=0) parser.add_option('--force-res', dest="force_res", action="store", type="int", help="force the midi file's resolution to be the given value, rather than using that read from the file") parser.add_option('-i', "--tick-times", dest="tick_times", action="store_true", help="show times as tick values, rather than proportions of the chunk") options, arguments = parse_args_with_config(parser) filename = arguments[0] # Load up the input midi file infile = read_midifile(filename, force_resolution=options.force_res) handler = MidiHandler(infile, time_unit=options.time_unit, tick_offset=options.tick_offset) slices = handler.get_slices() print "Printing %d-beat chunks with a %d-tick offset" % (options.time_unit, options.tick_offset) print "Total chunks: %d" % len(slices) print chunk_length = options.time_unit * infile.resolution for i,slc in enumerate(slices): strm = slc.to_event_stream() # Print the header for this chunk print "Chunk %d: %d-%d (%d events)" % \ (i, slc.start, slc.end,len(strm.trackpool)) print "".join(str(i).ljust(2) for i in range(11)), \ "Time ", "Vel", "Ch", "Tr" # Only show note-on events noteons = [ev for ev in sorted(strm.trackpool) \ if type(ev) == NoteOnEvent and ev.velocity > 0] # Sorted by time: within same tick, sort by pitch for k,grp in groupby(noteons): for ev in sorted(list(grp), key=lambda e:e.pitch): # Display all the information for this note octave = ev.pitch / 12 name = constants.NOTE_NAMES[ev.pitch % 12].ljust(2) indent = " " * octave fill = " " * (10-octave) if options.tick_times: time = str(ev.tick+slc.start).ljust(7) else: time = ("%.1f%%" % (100.0 * ev.tick / chunk_length)).ljust(7) channel = str(ev.channel).ljust(2) track = str(ev.track).ljust(2) velocity = str(ev.velocity).ljust(3) print "%s%s%s %s %s %s %s" % \ (indent, name, fill, time, velocity, channel, track) print
def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a backoff builder model using the given "\ "input data. Specify a model type (ngram, etc) and a name to "\ "identify it. The data file should be a stored SequenceIndex file." parser = OptionParser(usage=usage, description=description) parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.") parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.") # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") options, arguments = parse_args_with_config(parser) if len(arguments) < 3: print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_type = arguments[0] model_name = arguments[1] builder_cls = get_backoff_builder(model_type) model_cls = builder_cls.MODEL_CLASS # Load the sequence data from a dbinput file input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=['bulk-db', 'bulk-db-annotated']) # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) if options.partitions is not None: parts = holdout_partition(input_data, options.partitions) models = [(builder_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name,input_data)] for part_name,seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) # Train it with the loaded data model.train(seqs) model.save() print "Trained model %s" % (part_name)
def main(): usage = "%prog [options] <midi-file>" description = "Divides a midi file into chunks, with a given size and "\ "offset, and plays "\ "the chunks consecutively, with a gap between each." parser = OptionParser(usage=usage, description=description) parser.add_option('-t', '--time-unit', dest="time_unit", action="store", type="float", help="size of chunks in crotchet beats (according to the midi file's resolution)", default=4) parser.add_option('-o', '--tick-offset', dest="tick_offset", action="store", type="int", help="offset of the first chunk in midi ticks", default=0) parser.add_option('-g', '--gap', dest="gap", action="store", type="float", help="time to wait between playing each chunk in seconds (potentially float). It will take some time to load the chunk and the sequencer usually pauses before reporting it's done: this is not included in this value", default=0.0) parser.add_option('-p', '--print', dest="print_events", action="store_true", help="print out all events for each chunk") parser.add_option('--pno', '--print-note-ons', dest="print_note_ons", action="store_true", help="print only note-on events") parser.add_option('--force-res', dest="force_res", action="store", type="int", help="force the midi file's resolution to be the given value, rather than using that read from the file") parser.add_option('-s', '--start', dest="start", action="store", type="int", help="chunk number to start at", default=0) options, arguments = parse_args_with_config(parser) filename = arguments[0] # Load up the input midi file infile = read_midifile(filename, force_resolution=options.force_res) handler = MidiHandler(infile, time_unit=options.time_unit, tick_offset=options.tick_offset) slices = handler.get_slices() # Start at the requested chunk slices = slices[options.start:] print "Playing %d-beat chunks with a %d-tick offset" % (options.time_unit, options.tick_offset) if options.start > 0: print "Start from chunk %d" % options.start print "Total chunks: %d" % len(slices) print "Ctrl+C to exit" print try: for i,slc in enumerate(slices): strm = slc.to_event_stream(cancel_playing=True) print "Playing chunk %d: %d-%d (%d events)" % (i, slc.start, slc.end,len(strm.trackpool)) if options.print_events: print "\n".join(" %s" % ev for ev in sorted(strm.trackpool)) elif options.print_note_ons: print "\n".join(" %s" % ev for ev in sorted(strm.trackpool) \ if type(ev) is NoteOnEvent) play_stream(strm, block=True) if options.gap > 0.0: print " Waiting %s seconds..." % options.gap time.sleep(options.gap) except KeyboardInterrupt: print "Exiting"
def main(): usage = "%prog [<options>] <model-name>" description = "Delete a PCFG model" parser = OptionParser(usage=usage, description=description) parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \ help="Number of partitions the model is divided into") parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") options, arguments = parse_args_with_config(parser) # Load a grammar grammar = get_grammar(options.grammar) # Get the pcfg model class for the formalism PcfgModel = grammar.formalism.PcfgModel if len(arguments) == 0: print >> sys.stderr, "Specify a model name" models = PcfgModel.list_models() print >> sys.stderr, "Available models: %s" % ", ".join(models) sys.exit(1) model_name = arguments[0] print "Model base name:", model_name if options.partitions is not None: parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)] else: parts = [(None, model_name)] # First check all the models exist for parti, part_model in parts: if part_model not in PcfgModel.list_models(): print "The model '%s' does not exist" % part_model sys.exit(1) # Now delete them one by one for parti, part_model in parts: # Load the model model = PcfgModel.load_model(part_model) model.delete() print "Removed model: %s" % part_model
def main(): usage = "%prog <model-name>" description = "Debug a PCFG model" parser = OptionParser(usage=usage, description=description) parser.add_option("-g", "--grammar", dest="grammar", action="store", \ help="use the named grammar instead of the default.") parser.add_option("-d", "--debug", dest="debug", action="store_true", \ help="output debugging information during generation") parser.add_option("--file-options", "--fopt", dest="file_options", \ action="store", help="options for the input file "\ "(--file). Type '--fopt help' for a list of available "\ "options.") options, arguments = parse_args_with_config(parser) if len(arguments) < 1: print "Specify a model name" sys.exit(1) model_name = arguments[0] if len(arguments) < 2: print "Specify an input file" grammar = get_grammar(options.grammar) PcfgModel = grammar.formalism.PcfgModel # Load the trained model model = PcfgModel.load_model(model_name) # Try getting a file from the command-line options input_data = command_line_input(filename=arguments[1], filetype="db", options=options.file_options) # Prepare the input and annotations sequence = input_data.sequence categories = [chord.category for chord in sequence.iterator()] str_inputs = input_data.inputs # Build the implicit normal-form tree from the annotations try: tree = build_tree_for_sequence(sequence) except TreeBuildError, err: raise ModelTrainingError, "could not build a tree for '%s': %s" % \ (sequence.string_name, err)
def main(): usage = "%prog [options] <in-file>" description = "Dump a description of all the events in a midi file "\ "to stdout." parser = OptionParser(usage=usage, description=description) options, arguments = parse_args_with_config(parser) if len(arguments) == 0: print "No input MIDI file given" sys.exit(1) filename = arguments[0] # Load the midi file midi = read_midifile(filename) print "Midi file type %d" % midi.format print "Resolution: %d" % midi.resolution for track in range(len(midi)): print "\nTrack %d" % track for event in sorted(midi[track]): print "%s" % (event)
def main(): usage = "%prog [<options>] <model-name>" description = "Delete a PCFG model" parser = OptionParser(usage=usage, description=description) parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \ help="Number of partitions the model is divided into") parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") options, arguments = parse_args_with_config(parser) # Load a grammar grammar = get_grammar(options.grammar) # Get the pcfg model class for the formalism PcfgModel = grammar.formalism.PcfgModel if len(arguments) == 0: print >>sys.stderr, "Specify a model name" models = PcfgModel.list_models() print >>sys.stderr, "Available models: %s" % ", ".join(models) sys.exit(1) model_name = arguments[0] print "Model base name:", model_name if options.partitions is not None: parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)] else: parts = [(None, model_name)] # First check all the models exist for parti,part_model in parts: if part_model not in PcfgModel.list_models(): print "The model '%s' does not exist" % part_model sys.exit(1) # Now delete them one by one for parti,part_model in parts: # Load the model model = PcfgModel.load_model(part_model) model.delete() print "Removed model: %s" % part_model
def main(): usage = "%prog [options] <model_name> <in-file>" description = "Loads a chord labeling model and uses it to assign chord "\ "labels to the given MIDI file." parser = OptionParser(usage=usage, description=description) # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='segmidi') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") # Labeling options parser.add_option("--labeler-options", "--lopt", dest="labeler_options", action="append", help="options for the labeler. Type '--lopt help' for a list of available options.") parser.add_option("--no-key", "--nk", dest="no_key", action="store_true", help="merge together labels with the same key (same as --lopt nokey)") # Output options parser.add_option("--single", "-1", dest="single", action="store_true", help="show only one chord per time segment (same as --lopt n=1, but formats the output in a simpler way)") parser.add_option('-r', '--realize', dest="realize", action="store", help="realize the chord sequence as a midi file, overlaid on the input") parser.add_option('--chords-only', dest="chords_only", action="store_true", help="only realize the chords: don't overlay on the input midi (only works with -r)") options, arguments = parse_args_with_config(parser) if options.labeler_options is not None and "help" in options.labeler_options: print options_help_text(HPChordLabeler.LABELING_OPTIONS, intro="Options for HP chord labeler") sys.exit(0) if len(arguments) < 2: print >>sys.stderr, "You must specify a model name and an input "\ "(MIDI) data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] # Process the labeler options lopt_dict = ModuleOption.process_option_string(options.labeler_options) if options.single: # No point in getting more than one label, since we only display one lopt_dict['n'] = 1 if options.no_key: # Just set the nokey option lopt_dict['nokey'] = True # Check they're valid before doing anything else HPChordLabeler.process_labeling_options(lopt_dict) input_data = command_line_input(filename, filetype=options.filetype, options=options.file_options, allowed_types=['segmidi','bulk-segmidi']) bulk = not is_bulk_type(type(input_data)) if bulk: input_data = [input_data] for i,data in enumerate(input_data): input_stream = data.stream print "Read midi data in %d segments" % len(data) # Load the model model = HPChordLabeler.load_model(model_name) # Perform labeling labels = model.label(data, options=lopt_dict) # Try labeling as it will be passed to the tagger labs = model.label_lattice(data, options=lopt_dict) if options.single: # Special output for single label output print ", ".join(["%s" % timelabs[0][0] for timelabs in labels]) else: # Print out the labels for each timestep for time,timelabs in enumerate(labels): print "%d: %s" % (time, ", ".join(["%s (%.2e)" % (label,prob) for (label,prob) in timelabs])) if options.realize is not None: # Get the single best chord label for each time best_labels = [timelabs[0][0] for timelabs in labels] # Realize as a midi file print "Realizing output chord sequence" real = ChordSequenceRealizer(best_labels, model.chord_vocab, resolution=input_stream.resolution, chord_length=data.time_unit, text_events=True) if options.chords_only: # Don't overlay stream = real.generate(offset=data.tick_offset) else: stream = real.generate(overlay=input_stream, offset=data.tick_offset) if bulk: filename = "%s-%d" % (options.realize, i) else: filename = options.realize write_midifile(stream, filename)
def main(): usage = "%prog [<options>] <model-name>" description = "Queries an ngram model interactively" optparser = OptionParser(usage=usage, description=description) # Read in command line options and args options, arguments = parse_args_with_config(optparser) if len(arguments) < 1: print "Specify a model name" sys.exit(1) model_name = arguments[0] # Load the ngram model ngmodel = NgramTaggerModel.load_model(model_name) model = ngmodel.model input_getter = input_iterator(">> ") # Load the shell history if possible try: readline.read_history_file(settings.NGRAM_QUERY_HISTORY_FILE) except IOError: # No history file found. No problem pass print "N-gram model query" print "Loaded", model_name print print "Transition: t <state> <state-1> ... <state-n>" print "Emission: e <chord> <state>" print "State domain: states" print "Emission domain: ems" def _check_state(s): if s not in model.label_dom+[None]: raise QueryError, "invalid state label: %s" % s for query in input_getter: query = query.rstrip("\n").strip() if query: try: if query.startswith("states"): print ", ".join(model.label_dom) elif query.startswith("ems"): print ", ".join(model.emission_dom) elif query.startswith("t"): # Transition prob query states = query.split()[1:] if len(states) != model.order: print "Ngram must have length %d" % model.order continue states = [s if s != "None" else None for s in states] # Verify all these states for state in states: _check_state(state) # Get the transition probability prob = model.transition_probability_debug(*states) print "P(Qi = %s | %s) = %f" % (states[0], ", ".join(["Q(i-%d) = %s" % (i+1,s) for (i,s) in enumerate(states[1:])]), prob) elif query.startswith("e"): # Emission prob query em_state = query.split()[1:] if len(em_state) != 2: print "Emission query must consist of a chord and a state" continue em, state = em_state # Check the state label's valid _check_state(state) # Get the emission probability prob = model.emission_probability(em, state) # Print out the probability print "P(Oi = %s | Qi = %s) = %f" % (em, state, prob) else: print "Invalid query: %s" % query except QueryError, err: print "Check your query: %s" % err except Exception, err: print "Error processing query: %s" % err
def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a backoff builder model using the given "\ "input data. Specify a model type (ngram, etc) and a name to "\ "identify it. The data file should be a stored SequenceIndex file." parser = OptionParser(usage=usage, description=description) parser.add_option( '-p', '--partitions', dest="partitions", action="store", type="int", help= "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number." ) parser.add_option( '--opts', dest="training_opts", action="store", help= "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type." ) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) options, arguments = parse_args_with_config(parser) if len(arguments) < 3: print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_type = arguments[0] model_name = arguments[1] builder_cls = get_backoff_builder(model_type) model_cls = builder_cls.MODEL_CLASS # Load the sequence data from a dbinput file input_data = command_line_input( filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=['bulk-db', 'bulk-db-annotated']) # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) if options.partitions is not None: parts = holdout_partition(input_data, options.partitions) models = [(builder_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name, input_data)] for part_name, seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) # Train it with the loaded data model.train(seqs) model.save() print "Trained model %s" % (part_name)
def main(): usage = "%prog [<options>] <model-name> <training-input>" description = "Training of PCFG models." parser = OptionParser(usage=usage, description=description) parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \ help="Number of partitions to divide the data into. "\ "For train, divides the input file, trains a model on each "\ "partition's complement and appends partition number to "\ "the model names. For del, appends partition numbers to model "\ "names and deletes all the models. Recache does similarly. "\ "Has no effect for parse.") parser.add_option( '--opts', dest="training_opts", action="store", help= "options to pass to the model trainer. Type '--opts help' for a list of options" ) parser.add_option("--debug", dest="debug", action="store_true", help="Output verbose logging information to stderr") parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") options, arguments = parse_args_with_config(parser) if options.debug: log_level = logging.DEBUG else: log_level = logging.WARN # Create a logger for training logger = create_logger(log_level=log_level, name="training", stderr=True) # Load a grammar grammar = get_grammar(options.grammar) # Get the pcfg model class for the formalism PcfgModel = grammar.formalism.PcfgModel # Parse the option string if options.training_opts is None: opts = {} elif options.training_opts.lower() == "help": print options_help_text(PcfgModel.TRAINING_OPTIONS, intro="Training options for PCFGs") sys.exit(0) else: opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), PcfgModel.TRAINING_OPTIONS) if len(arguments) == 0: print >> sys.stderr, "Specify a model name" models = PcfgModel.list_models() print >> sys.stderr, "Available models: %s" % ", ".join(models) sys.exit(1) model_name = arguments[0] print "Model base name:", model_name if options.partitions is not None: parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)] else: parts = [(None, model_name)] if len(arguments) < 2: print >> sys.stderr, "Specify an input file to read sequence data from" sys.exit(1) # Read in the training data from the given file seqs = SequenceIndex.from_file(arguments[1]) if options.partitions is not None: # Prepare each training partition datasets = holdout_partition(seqs.sequences, options.partitions) else: datasets = [seqs.sequences] for dataset, (parti, part_model) in zip(datasets, parts): # Train the named model on the sequence data model = PcfgModel.train(part_model, dataset, opts, grammar=grammar, logger=logger) model.save() print "Trained model", part_model
def main(): usage = "%prog [<options>]" description = "Runs a supertagger from the Jazz Parser to tag some input "\ "but just outputs the results, rather than continuing to parse." optparser = OptionParser(usage=usage, description=description) # Tagger options optparser.add_option( "-t", "--tagger", "--supertagger", dest="supertagger", action="store", help= "run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s" % settings.DEFAULT_SUPERTAGGER, default=settings.DEFAULT_SUPERTAGGER) optparser.add_option( "--topt", "--tagger-options", dest="topts", action="append", help= "specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options." ) # Commonly-used misc optparser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") # File input options optparser.add_option( "--file", "-f", dest="file", action="store", help= "use a file to get parser input from. Use --filetype to specify the type of the file." ) optparser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords", default='chords') optparser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) # Misc options optparser.add_option("-v", "--debug", dest="debug", action="store_true", help="output verbose debugging information.") optparser.add_option( "-i", "--interactive", dest="interactive", action="store_true", help= "instead of just outputing all tags in one go, wait for user input between each iteration of adaptive supertagging" ) # Logging options optparser.add_option( "--logger", dest="logger", action="store", help= "directory to put parser logging in. A filename based on an identifier for each individual input will be appended." ) # Read in command line options and args options, clinput = parse_args_with_config(optparser) ########################### Option processing #################### if options.logger: # Directory parse_logger_dir = options.logger check_directory(parse_logger_dir) else: parse_logger_dir = None ######## Grammar ######## # Read in the grammar grammar = get_grammar(options.grammar) ######## Supertagger ######## # Load the supertagger requested if options.supertagger.lower() == "help": print "Available taggers are: %s" % ", ".join(TAGGERS) return 0 try: tagger_cls = get_tagger(options.supertagger) except TaggerLoadError: logger.error("The tagger '%s' could not be loaded. Possible "\ "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS))) return 1 # Get supertagger options before initializing the tagger if options.topts is not None: toptstr = options.topts if "help" in [s.strip().lower() for s in toptstr]: # Output this tagger's option help from jazzparser.utils.options import options_help_text print options_help_text( tagger_cls.TAGGER_OPTIONS, intro="Available options for selected tagger") return 0 toptstr = ":".join(toptstr) else: toptstr = "" topts = ModuleOption.process_option_string(toptstr) # Check that the options are valid try: tagger_cls.check_options(topts) except ModuleOptionError, err: print "Problem with tagger options (--topt): %s" % err return 1
def main(): usage = "%prog [<options>]" description = "Runs a supertagger from the Jazz Parser to tag some input "\ "but just outputs the results, rather than continuing to parse." optparser = OptionParser(usage=usage, description=description) # Tagger options optparser.add_option("-t", "--tagger", "--supertagger", dest="supertagger", action="store", help="run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s" % settings.DEFAULT_SUPERTAGGER, default=settings.DEFAULT_SUPERTAGGER) optparser.add_option("--topt", "--tagger-options", dest="topts", action="append", help="specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options.") # Commonly-used misc optparser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") # File input options optparser.add_option("--file", "-f", dest="file", action="store", help="use a file to get parser input from. Use --filetype to specify the type of the file.") optparser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords", default='chords') optparser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") # Misc options optparser.add_option("-v", "--debug", dest="debug", action="store_true", help="output verbose debugging information.") optparser.add_option("-i", "--interactive", dest="interactive", action="store_true", help="instead of just outputing all tags in one go, wait for user input between each iteration of adaptive supertagging") # Logging options optparser.add_option("--logger", dest="logger", action="store", help="directory to put parser logging in. A filename based on an identifier for each individual input will be appended.") # Read in command line options and args options, clinput = parse_args_with_config(optparser) ########################### Option processing #################### if options.logger: # Directory parse_logger_dir = options.logger check_directory(parse_logger_dir) else: parse_logger_dir = None ######## Grammar ######## # Read in the grammar grammar = get_grammar(options.grammar) ######## Supertagger ######## # Load the supertagger requested if options.supertagger.lower() == "help": print "Available taggers are: %s" % ", ".join(TAGGERS) return 0 try: tagger_cls = get_tagger(options.supertagger) except TaggerLoadError: logger.error("The tagger '%s' could not be loaded. Possible "\ "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS))) return 1 # Get supertagger options before initializing the tagger if options.topts is not None: toptstr = options.topts if "help" in [s.strip().lower() for s in toptstr]: # Output this tagger's option help from jazzparser.utils.options import options_help_text print options_help_text(tagger_cls.TAGGER_OPTIONS, intro="Available options for selected tagger") return 0 toptstr = ":".join(toptstr) else: toptstr = "" topts = ModuleOption.process_option_string(toptstr) # Check that the options are valid try: tagger_cls.check_options(topts) except ModuleOptionError, err: print "Problem with tagger options (--topt): %s" % err return 1
def main(): usage = "%prog [options] <midi-file>" description = "Divides a midi file into chunks, with a given size and "\ "offset, and print the chunks consecutively." parser = OptionParser(usage=usage, description=description) parser.add_option( '-t', '--time-unit', dest="time_unit", action="store", type="float", help= "size of chunks in crotchet beats (according to the midi file's resolution)", default=4) parser.add_option('-o', '--tick-offset', dest="tick_offset", action="store", type="int", help="offset of the first chunk in midi ticks", default=0) parser.add_option( '--force-res', dest="force_res", action="store", type="int", help= "force the midi file's resolution to be the given value, rather than using that read from the file" ) parser.add_option( '-i', "--tick-times", dest="tick_times", action="store_true", help="show times as tick values, rather than proportions of the chunk") options, arguments = parse_args_with_config(parser) filename = arguments[0] # Load up the input midi file infile = read_midifile(filename, force_resolution=options.force_res) handler = MidiHandler(infile, time_unit=options.time_unit, tick_offset=options.tick_offset) slices = handler.get_slices() print "Printing %d-beat chunks with a %d-tick offset" % ( options.time_unit, options.tick_offset) print "Total chunks: %d" % len(slices) print chunk_length = options.time_unit * infile.resolution for i, slc in enumerate(slices): strm = slc.to_event_stream() # Print the header for this chunk print "Chunk %d: %d-%d (%d events)" % \ (i, slc.start, slc.end,len(strm.trackpool)) print "".join(str(i).ljust(2) for i in range(11)), \ "Time ", "Vel", "Ch", "Tr" # Only show note-on events noteons = [ev for ev in sorted(strm.trackpool) \ if type(ev) == NoteOnEvent and ev.velocity > 0] # Sorted by time: within same tick, sort by pitch for k, grp in groupby(noteons): for ev in sorted(list(grp), key=lambda e: e.pitch): # Display all the information for this note octave = ev.pitch / 12 name = constants.NOTE_NAMES[ev.pitch % 12].ljust(2) indent = " " * octave fill = " " * (10 - octave) if options.tick_times: time = str(ev.tick + slc.start).ljust(7) else: time = ("%.1f%%" % (100.0 * ev.tick / chunk_length)).ljust(7) channel = str(ev.channel).ljust(2) track = str(ev.track).ljust(2) velocity = str(ev.velocity).ljust(3) print "%s%s%s %s %s %s %s" % \ (indent, name, fill, time, velocity, channel, track) print
def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a supertagging model using the given "\ "input data. Specify a model type (baseline1, etc) and a name to "\ "identify it. The data file may be a stored SequenceIndex file, or "\ "any other type of bulk data file. "\ "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS) parser = OptionParser(usage=usage, description=description) parser.add_option( '-p', '--partitions', dest="partitions", action="store", type="int", help= "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number." ) parser.add_option( '--opts', dest="training_opts", action="store", help= "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type." ) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) # Logging output parser.add_option( '--log', dest="log", action="store", help= "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end" ) options, arguments = parse_args_with_config(parser) grammar = Grammar() # Get the model type first: we might not need the other args if len(arguments) == 0: print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments" model_type = arguments[0] if model_type not in TRAINABLE_MODELS: print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \ (model_type, ", ".join(TRAINABLE_MODELS)) sys.exit(1) if model_type not in TAGGERS: print >>sys.stderr, "'%s' isn't a registered model type. Check that "\ "the name in TRAINABLE_MODELS is correct" % model_type sys.exit(1) tagger_cls = get_tagger(model_type) if not issubclass(tagger_cls, ModelTagger): print >> sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % ( tagger_cls.__name__) sys.exit(1) model_cls = tagger_cls.MODEL_CLASS # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) # Get the rest of the args if len(arguments) < 3: print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_name = arguments[1] # Load the sequence data # Only allow bulk types input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=get_input_type_names( single=False, bulk=True)) if options.partitions is not None and options.partitions > 1: parts = input_data.get_partitions(options.partitions)[1] models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name, input_data)] for part_name, seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) if options.log is not None: # Prepare a logger logfile = "%s%s.log" % (options.log, part_name) print "Logging output to file %s" % logfile logger = create_logger(filename=logfile) else: logger = None # Train the model with the loaded data model.train(seqs, logger=logger) model.save() print "Trained model %s" % (part_name)
def main(): usage = "%prog [options] <in-file>" description = "Print out stats about the notes in a MIDI file" parser = OptionParser(usage=usage, description=description) parser.add_option( '-k', '--key-profile', dest="key_profile", action="store", type="int", help= "output a graph of the key profile for the given key as a gnuplot script" ) options, arguments = parse_args_with_config(parser) if len(arguments) == 0: print "No input MIDI file given" sys.exit(1) filename = arguments[0] # Load the midi file midi = read_midifile(filename) print "Midi file type %d" % midi.format print "Resolution: %d" % midi.resolution print "%d notes" % len(note_ons(midi)) # Get rid of drums midi = simplify(midi, remove_drums=True) notes = note_ons(midi) print "%d non-drum notes" % len(notes) # Analyse the note content pcs = dict([(i, 0) for i in range(12)]) for note in notes: pcs[note.pitch % 12] += 1 note_names = dict([(0, "C"), (1, "C#"), (2, "D"), (3, "D#"), (4, "E"), (5, "F"), (6, "F#"), (7, "G"), (8, "G#"), (9, "A"), (10, "A#"), (11, "B")]) # Print the notes for pc, count in reversed(sorted(pcs.items(), key=lambda x: x[1])): print "%s: %d" % (note_names[pc], count) if options.key_profile is not None: kp_output_file = "key_profile" pc_names = [ "1", "#1/b2", "2", "#2/b3", "3", "4", "#4/b5", "5", "#5/b6", "6", "#6/b7", "7" ] # Output the pitch counts key = options.key_profile # Get the pc frequencies pc_freq = [ float(pcs[(key + p) % 12]) / sum(pcs.values()) for p in range(12) ] # Output them to a CSV data = "\n".join("%d\t%s\t%f" % (i, name, freq) for (name, freq, i) in zip(pc_names, pc_freq, range(12))) with open("%s.csv" % kp_output_file, 'w') as f: f.write(data) # Output the Gnuplot script gnuplot = """\ set style data lines set nokey set xrange [-1:13] set terminal pdf monochrome set output "key_profile.pdf" set xlabel "Pitch class" plot "key_profile.csv" using 1:3:xticlabel(2) """ with open("%s.p" % kp_output_file, 'w') as f: f.write(gnuplot) # Run Gnuplot call(["gnuplot", "%s.p" % kp_output_file]) print "Gnuplot plot output to %s.p and %s.pdf" % (kp_output_file, kp_output_file)
def main(): usage = "%prog [options] <input>" description = "Divides midi files into chunks, with size and offset, "\ "given in the input file, and plays "\ "the chunks consecutively. Input is a segmented bulk midi input file." parser = OptionParser(usage=usage, description=description) parser.add_option( '-g', '--gap', dest="gap", action="store", type="float", help= "time to wait between playing each chunk in seconds (potentially float). It will take some time to load the chunk and the sequencer usually pauses before reporting it's done: this is not included in this value", default=0.0) parser.add_option('-p', '--print', dest="print_events", action="store_true", help="print out all events for each chunk") parser.add_option('--pno', '--print-note-ons', dest="print_note_ons", action="store_true", help="print only note-on events") parser.add_option( '--fopt', dest="file_options", action="store", help= "options for file loading. Use '--fopt help' to see available options") options, arguments = parse_args_with_config(parser) filename = arguments[0] # Try getting a file from the command-line options input_data = command_line_input(filename=filename, filetype='bulk-segmidi', options=options.file_options) # Play each input in turn input_getter = iter(input_data) segmidi = input_getter.next() while True: print "###############################" print "Playing '%s'" % segmidi.name print "%s-beat chunks with a %d-tick offset\n" % \ (segmidi.time_unit, segmidi.tick_offset) slices = list(segmidi) try: for i, strm in enumerate(slices): print "Playing chunk %d: %d events" % (i, len(strm.trackpool)) if options.print_events: print "\n".join(" %s" % ev for ev in sorted(strm.trackpool)) elif options.print_note_ons: print "\n".join(" %s" % ev for ev in sorted(strm.trackpool) \ if type(ev) is NoteOnEvent) # Play this midi chunk play_stream(strm, block=True) # Leave a gap before continuing if options.gap > 0.0: time.sleep(options.gap) except KeyboardInterrupt: pass print "Continue to next song (<enter>); exit (x); play again (p)" command = raw_input(">> ").lower() if command == "x": sys.exit(0) elif command == "p": # Play again continue elif command == "": # Move to next segmidi = input_getter.next() continue else: print "Unknown command: %s" % command print "Playing again..." continue sys.exit(0)
def main(): usage = "%prog [options] <input>" description = "Divides midi files into chunks, with size and offset, "\ "given in the input file, and plays "\ "the chunks consecutively. Input is a segmented bulk midi input file." parser = OptionParser(usage=usage, description=description) parser.add_option('-g', '--gap', dest="gap", action="store", type="float", help="time to wait between playing each chunk in seconds (potentially float). It will take some time to load the chunk and the sequencer usually pauses before reporting it's done: this is not included in this value", default=0.0) parser.add_option('-p', '--print', dest="print_events", action="store_true", help="print out all events for each chunk") parser.add_option('--pno', '--print-note-ons', dest="print_note_ons", action="store_true", help="print only note-on events") parser.add_option('--fopt', dest="file_options", action="store", help="options for file loading. Use '--fopt help' to see available options") options, arguments = parse_args_with_config(parser) filename = arguments[0] # Try getting a file from the command-line options input_data = command_line_input(filename=filename, filetype='bulk-segmidi', options=options.file_options) # Play each input in turn input_getter = iter(input_data) segmidi = input_getter.next() while True: print "###############################" print "Playing '%s'" % segmidi.name print "%s-beat chunks with a %d-tick offset\n" % \ (segmidi.time_unit, segmidi.tick_offset) slices = list(segmidi) try: for i,strm in enumerate(slices): print "Playing chunk %d: %d events" % (i, len(strm.trackpool)) if options.print_events: print "\n".join(" %s" % ev for ev in sorted(strm.trackpool)) elif options.print_note_ons: print "\n".join(" %s" % ev for ev in sorted(strm.trackpool) \ if type(ev) is NoteOnEvent) # Play this midi chunk play_stream(strm, block=True) # Leave a gap before continuing if options.gap > 0.0: time.sleep(options.gap) except KeyboardInterrupt: pass print "Continue to next song (<enter>); exit (x); play again (p)" command = raw_input(">> ").lower() if command == "x": sys.exit(0) elif command == "p": # Play again continue elif command == "": # Move to next segmidi = input_getter.next() continue else: print "Unknown command: %s" % command print "Playing again..." continue sys.exit(0)
def main(): usage = "%prog [options] <model-name> <midi-file>" description = "Assigns harmonic labels to a midi file using a trained "\ "Raphsto model" parser = OptionParser(usage=usage, description=description) parser.add_option( '-t', '--time-unit', dest="time_unit", action="store", type="float", help= "number of beats to take as the basic unit (chunk size) for labelling", default=2) parser.add_option('-o', '--tick-offset', dest="tick_offset", action="store", type="int", help="time in midi ticks at which to start labelling", default=0) parser.add_option('-d', '--remove-drums', dest="remove_drums", action="store_true", help="ignores any channel 10 events in the midi file", default=False) parser.add_option( '-c', '--chords', dest="chords", action="store_true", help= "displays just chord roots instead of full analysis (default: both)") parser.add_option( '-a', '--analysis', dest="analysis", action="store_true", help= "displays a full analysis without reducing to chord roots (default: both)" ) parser.add_option( '-r', '--realize', dest="realize", action="store", help= "realize the chord sequence as a midi file (very basic and horrible realization)" ) parser.add_option( '--rands', '--raphsto', dest="raphsto", action="store_true", help= "displays analysis in the style of the annotations added to MIDI files by the original implementation" ) parser.add_option( '--lyrics', dest="lyrics", action="store_true", help="include the chord labels as lyric events in the midi file", default=False) parser.add_option( '-m', '--model-type', dest="model_type", action="store", help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()), default="standard") options, arguments = parse_args_with_config(parser) if len(arguments) < 2: print >> sys.stderr, "You must specify a model name and an input midi file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] if options.model_type not in MODEL_TYPES: print >> sys.stderr, "Model type must be one of: %s" % ", ".join( mt for mt in MODEL_TYPES) sys.exit(1) model_cls = MODEL_TYPES[options.model_type] # Load the model model = model_cls.load_model(model_name) mid = read_midifile(filename) bar = mid.resolution * options.time_unit handler = MidiHandler(mid, time_unit=options.time_unit, tick_offset=options.tick_offset, remove_drums=options.remove_drums) # Decode using the model to get a list of states state_changes = model.label(handler) states, times = zip(*state_changes) if options.chords: print "\n".join("%s (bar %d)" % (format_state_as_chord(st),time/bar) \ for st,time in state_changes) elif options.analysis: print "\n".join("%s (bar %d)" % (format_state(st),time/bar) \ for st,time in state_changes) elif options.raphsto: print "\n".join(format_state_as_raphsto(st, (time/bar)) \ for st,time in state_changes) else: print "\n".join("%s%s(bar %d)" % \ (format_state(st).ljust(15), format_state_as_chord(st).ljust(7), time/bar) for st,time in state_changes) if options.realize is not None: # Realize as a midi file real = ChordSequenceRealizer(states, resolution=mid.resolution, times=times, chord_length=options.time_unit, text_events=options.lyrics) stream = real.generate(overlay=mid, offset=options.tick_offset) write_midifile(stream, options.realize)
def main(): usage = "%prog [options] <model-name> <midi-file>" description = "Assigns harmonic labels to a midi file using a trained " "Raphsto model" parser = OptionParser(usage=usage, description=description) parser.add_option( "-t", "--time-unit", dest="time_unit", action="store", type="float", help="number of beats to take as the basic unit (chunk size) for labelling", default=2, ) parser.add_option( "-o", "--tick-offset", dest="tick_offset", action="store", type="int", help="time in midi ticks at which to start labelling", default=0, ) parser.add_option( "-d", "--remove-drums", dest="remove_drums", action="store_true", help="ignores any channel 10 events in the midi file", default=False, ) parser.add_option( "-c", "--chords", dest="chords", action="store_true", help="displays just chord roots instead of full analysis (default: both)", ) parser.add_option( "-a", "--analysis", dest="analysis", action="store_true", help="displays a full analysis without reducing to chord roots (default: both)", ) parser.add_option( "-r", "--realize", dest="realize", action="store", help="realize the chord sequence as a midi file (very basic and horrible realization)", ) parser.add_option( "--rands", "--raphsto", dest="raphsto", action="store_true", help="displays analysis in the style of the annotations added to MIDI files by the original implementation", ) parser.add_option( "--lyrics", dest="lyrics", action="store_true", help="include the chord labels as lyric events in the midi file", default=False, ) parser.add_option( "-m", "--model-type", dest="model_type", action="store", help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()), default="standard", ) options, arguments = parse_args_with_config(parser) if len(arguments) < 2: print >> sys.stderr, "You must specify a model name and an input midi file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] if options.model_type not in MODEL_TYPES: print >> sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES) sys.exit(1) model_cls = MODEL_TYPES[options.model_type] # Load the model model = model_cls.load_model(model_name) mid = read_midifile(filename) bar = mid.resolution * options.time_unit handler = MidiHandler( mid, time_unit=options.time_unit, tick_offset=options.tick_offset, remove_drums=options.remove_drums ) # Decode using the model to get a list of states state_changes = model.label(handler) states, times = zip(*state_changes) if options.chords: print "\n".join("%s (bar %d)" % (format_state_as_chord(st), time / bar) for st, time in state_changes) elif options.analysis: print "\n".join("%s (bar %d)" % (format_state(st), time / bar) for st, time in state_changes) elif options.raphsto: print "\n".join(format_state_as_raphsto(st, (time / bar)) for st, time in state_changes) else: print "\n".join( "%s%s(bar %d)" % (format_state(st).ljust(15), format_state_as_chord(st).ljust(7), time / bar) for st, time in state_changes ) if options.realize is not None: # Realize as a midi file real = ChordSequenceRealizer( states, resolution=mid.resolution, times=times, chord_length=options.time_unit, text_events=options.lyrics ) stream = real.generate(overlay=mid, offset=options.tick_offset) write_midifile(stream, options.realize)
def prepare_evaluation_options(usage=None, description=None, optparse_options=[], check_args=None, optparse_groups=[]): """ Various tasks common to the initial part of the evaluation routine scripts (C{models/eval.py}). @todo: This is not used any more. Remove it, after checking it's definitely not used. @param usage: the optparse usage string @param description: the optparse description string @type optparse_options: list of tuples @param optparse_options: (args,kwargs) pairs to add additional options to the optparse parser. @type check_args: function @param check_args: function to take the command-line arguments and check them. This will be called early in the script. Must return a tuple of (1) the model name (or model basename) that will be used in the partition model names and (2) the input filename to get sequences from. @type optparse_groups: list of pairs @param optparse_groups: specificatios for option groups to add to the optparse option parser. The first of each pair is a tuple of args to C{OptionGroup}'s init (excluding the first). The second is a list of options each formatted as C{optparse_options}. @rtype: tuple @return: (1) list of (sequences,model_name,partition_index) tuples for each partition; (2) list of lists containing the sequence ids for each partition; (3) optparse options; (4) optparse arguments. """ import sys from optparse import OptionParser, OptionGroup from jazzparser.utils.config import parse_args_with_config from jazzparser.utils.loggers import init_logging from jazzparser.data.db_mirrors import SequenceIndex from jazzparser.utils.data import partition parser = OptionParser(usage=usage, description=description) group = OptionGroup(parser, "Input", "Input data and partitioning for evaluation") group.add_option("-s", "--sequence", dest="sequence", action="store", help="limit the evaluation to just one sequence, with the given index in the input file") group.add_option("--partition", dest="partition", action="store", help="restrict to only one partition of the data. Specify as i/n, where i is the partition number and n the total number of partitions.") group.add_option("-p", "--partitions", dest="partitions", type="int", action="store", help="test on all n partitions of the data, using a different model for each. Will look for a model <NAME>i, where <NAME> is the given model name and i the partition number.") parser.add_option_group(group) parser.add_option("--debug", dest="debug", action="store_true", help="show debugging output") # Add the options according to their specs for args,kwargs in optparse_options: parser.add_option(*args, **kwargs) # Add groups and their options for group_args,options in optparse_groups: # Check whether the group already exists same_titles = [g for g in parser.option_groups if g.title == group_args[0]] if same_titles: group = same_titles[0] else: group = OptionGroup(parser, *group_args) parser.add_option_group(group) # Add options to this group for args,kwargs in options: group.add_option(*args, **kwargs) options, arguments = parse_args_with_config(parser) if check_args is None: raise ValueError, "could not check arguments and get model "\ "name. check_args must not be None" model_name,input_filename = check_args(arguments) if options.debug: # Set the log level to debug and do the standard logging init init_logging(logging.DEBUG) else: init_logging() # Load up sequences seqs = SequenceIndex.from_file(input_filename) def _get_seq_by_index(index): seq = seqs.sequence_by_index(index) if seq is None: print >>sys.stderr, "There are only %d sequences" % len(seqs) sys.exit(1) return seq ################ Data partitioning #################### if options.partitions is not None: # Divide the data up into n partitions and use a different model name for each total_parts = options.partitions print >>sys.stderr, "Cross validation: dividing test data into %d partitions" % total_parts partitions = [(part,"%s%d" % (model_name,i), i) for i,part in enumerate(partition(seqs.sequences, total_parts))] part_ids = partition(seqs.ids, total_parts) elif options.partition is not None: # Just select one partition # Split up the argument to get two integers parti,total_parts = options.partition.split("/") parti,total_parts = int(parti), int(total_parts) print >>sys.stderr, "Restricting sequences to %d-way partition %d" % (total_parts,parti) # Get a list of sequence indices to restrict our set to part_ids = partition(seqs.ids, total_parts)[parti] partitions = [ [(part,"%s%d" % (model_name,i), i) for i,part in enumerate(partition(seqs.sequences, total_parts))][parti] ] elif options.sequence is not None: # Just select one sequence seq = _get_seq_by_index(int(options.sequence)) partitions = [( [seq], model_name, 0 )] part_ids = [seq.id] else: # Don't partition the sequences partitions = [(seqs.sequences, model_name,0)] part_ids = [None] return partitions,part_ids,options,arguments
def main(): usage = "%prog [options] <model_name> <input-file>" description = "Trains a model for the RaphSto chord labelling "\ "algorithm on a file that contains a list of midi files with "\ "training options" parser = OptionParser(usage=usage, description=description) parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.") parser.add_option('--opts', dest="opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.") parser.add_option('--proc', '--processes', dest="processes", action="store", type="int", help="number of parallel processes to spawn for the training. Use -1 to spawn one per training sequence (after splitting: see split_length)", default=1) parser.add_option('--max-length', dest="max_length", action="store", type="int", help="limits the length of the training midi sequences in chunks") parser.add_option('--split-length', dest="split_length", action="store", type="int", help="limits the length of the training midi sequences in chunks, but instead of throwing away everything after the first N chunks, splits it off as if it were starting a new sequence. This is good for multiprocessing, since many short sequences can be multitasked, whilst few long ones cannot") parser.add_option('--min-length', dest="min_length", action="store", type="int", help="ignores any sequences under this number of chunks. This is useful with --split-length, which can leave very short sequences from the end of a split sequence") parser.add_option('--progress-out', dest="progress_out", action="store", help="output logging info to a file instead of the command line") parser.add_option('--init-model', dest="init_model", action="store", help="initialize the model using parameters from an already trained model") parser.add_option('--init-ctrans', dest="init_ctrans", action="store", help="initialize the chord transition distribution using these parameters. Comma-separated list of params given as C0->C1-P, where C0 and C1 are chords (I, II, etc) and P is a float probability") parser.add_option('--chord-set', dest="chord_set", action="store", help="use a chord set other than the default. Use value 'help' to see a list. Has no effect in combination with --init-model, since the old model's chord set will be used") parser.add_option('-m', '--model-type', dest="model_type", action="store", help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()), default="standard") options, arguments = parse_args_with_config(parser) if options.opts is not None and options.opts == "help": print options_help_text(RaphstoBaumWelchTrainer.OPTIONS, intro="Training options for Raphael and Stoddard HMMs") sys.exit(0) opts = ModuleOption.process_option_string(options.opts) if len(arguments) < 2: print >>sys.stderr, "You must specify a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] print >>sys.stderr, "Raphsto training beginning at %s" % datetime.now().isoformat(' ') # Create a logger to output the progress of the training to stdout or a file if options.progress_out is not None: stdout = False logfile = options.progress_out print >>sys.stderr, "Outputing logging info to %s" % logfile else: stdout = True logfile = None print >>sys.stderr, "Outputing logging to stdout" logger = create_logger(name="raphsto_train", filename=logfile, stdout=stdout) logger.info("Raphael and Stoddard HMM model training") if options.model_type not in MODEL_TYPES: print >>sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES) sys.exit(1) model_cls = MODEL_TYPES[options.model_type] if options.chord_set == "help": print "Available chord sets: %s" % ", ".join(constants.CHORD_SETS.keys()) sys.exit(0) elif options.chord_set is not None: # Check this chord set exists if options.chord_set not in constants.CHORD_SETS: print >>sys.stderr, "Chord set '%s' does not exist" % options.chord_set sys.exit(1) else: logger.info("Using chord set '%s'" % options.chord_set) # Read in the training data midis = InputSourceFile(filename) handlers = midis.get_handlers() logger.info("Reading in %d midi files..." % len(midis.inputs)) training_data = [] for i,mh in enumerate(handlers): logger.info("%s: %s" % (i,midis.inputs[i][0])) emissions = mh.get_emission_stream()[0] if options.max_length is not None and len(emissions) > options.max_length: logger.info("Truncating file %d to %d chunks (was %d)" % \ (i,options.max_length,len(emissions))) emissions = emissions[:options.max_length] if options.split_length is not None: logger.info("Splitting sequence %d into sequence no longer "\ "than %d chunks" % (i,options.split_length)) # Split up the sequence if it's too long while len(emissions) > options.split_length: training_data.append(emissions[:options.split_length]) emissions = emissions[options.split_length:] training_data.append(emissions) if options.min_length is not None: # Make sure there are no sequences under the minimum length # Just throw away any that are before_chuck = len(training_data) training_data = [seq for seq in training_data if len(seq) >= options.min_length] if len(training_data) != before_chuck: logger.info("Threw away %d short sequences (below %d chunks)" % \ ((before_chuck-len(training_data)), options.min_length)) logger.info("Training on %d sequences. Lengths: %s" % \ (len(training_data), ", ".join(str(len(seq)) for seq in training_data))) if options.partitions is not None: parts = holdout_partition(training_data, options.partitions) models = [("%s%d" % (model_name,num),data) for num,data in enumerate(parts)] else: models = [(model_name,training_data)] # Number of processes to use if options.processes == -1: # Special value: means number of training sequences (one process per sequence) processes = len(training_data) else: processes = options.processes for part_name,data in models: # Instantiate a fresh model with this name logger.info("Training model '%s' on %d midis" % (part_name, len(data))) if options.init_model is not None: logger.info("Initializing using parameters from model '%s'" % \ options.init_model) # Load an already trained model as initialization model = model_cls.initialize_existing_model(options.init_model, \ model_name=part_name) else: # TODO: make these probs an option ctype_params = (0.5, 0.3, 0.2) logger.info("Initializing to naive chord types using parameters: "\ "%s, %s, %s" % ctype_params) init_kwargs = { 'model_name' : part_name } if options.chord_set is not None: # Specify a chord set for the model init_kwargs['chord_set'] = options.chord_set model = model_cls.initialize_chord_types(ctype_params, **init_kwargs) # Initialize the chord transition probabilities if given if options.init_ctrans is not None: logger.info("Initializing chord transition distribution to %s" \ % options.init_ctrans) model.set_chord_transition_probabilities(options.init_ctrans) # Retrain it with the loaded data trainer = model_cls.get_trainer()(model, options=opts) trainer.train(data, logger=logger, processes=processes, save_intermediate=True) print >>sys.stderr, "Training terminating at %s" % datetime.now().isoformat(' ')
def main(): usage = "%prog [<options>] <model-name>" description = "Queries an ngram model interactively" optparser = OptionParser(usage=usage, description=description) # Read in command line options and args options, arguments = parse_args_with_config(optparser) if len(arguments) < 1: print "Specify a model name" sys.exit(1) model_name = arguments[0] # Load the ngram model ngmodel = NgramTaggerModel.load_model(model_name) model = ngmodel.model input_getter = input_iterator(">> ") # Load the shell history if possible try: readline.read_history_file(settings.NGRAM_QUERY_HISTORY_FILE) except IOError: # No history file found. No problem pass print "N-gram model query" print "Loaded", model_name print print "Transition: t <state> <state-1> ... <state-n>" print "Emission: e <chord> <state>" print "State domain: states" print "Emission domain: ems" def _check_state(s): if s not in model.label_dom + [None]: raise QueryError, "invalid state label: %s" % s for query in input_getter: query = query.rstrip("\n").strip() if query: try: if query.startswith("states"): print ", ".join(model.label_dom) elif query.startswith("ems"): print ", ".join(model.emission_dom) elif query.startswith("t"): # Transition prob query states = query.split()[1:] if len(states) != model.order: print "Ngram must have length %d" % model.order continue states = [s if s != "None" else None for s in states] # Verify all these states for state in states: _check_state(state) # Get the transition probability prob = model.transition_probability_debug(*states) print "P(Qi = %s | %s) = %f" % (states[0], ", ".join([ "Q(i-%d) = %s" % (i + 1, s) for (i, s) in enumerate(states[1:]) ]), prob) elif query.startswith("e"): # Emission prob query em_state = query.split()[1:] if len(em_state) != 2: print "Emission query must consist of a chord and a state" continue em, state = em_state # Check the state label's valid _check_state(state) # Get the emission probability prob = model.emission_probability(em, state) # Print out the probability print "P(Oi = %s | Qi = %s) = %f" % (em, state, prob) else: print "Invalid query: %s" % query except QueryError, err: print "Check your query: %s" % err except Exception, err: print "Error processing query: %s" % err
def main(): usage = "%prog [options] <model_name> <input-file>" description = ( "Trains a model for the RaphSto chord labelling " "algorithm on a file that contains a list of midi files with " "training options" ) parser = OptionParser(usage=usage, description=description) parser.add_option( "-p", "--partitions", dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.", ) parser.add_option( "--opts", dest="opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.", ) parser.add_option( "--proc", "--processes", dest="processes", action="store", type="int", help="number of parallel processes to spawn for the training. Use -1 to spawn one per training sequence (after splitting: see split_length)", default=1, ) parser.add_option( "--max-length", dest="max_length", action="store", type="int", help="limits the length of the training midi sequences in chunks", ) parser.add_option( "--split-length", dest="split_length", action="store", type="int", help="limits the length of the training midi sequences in chunks, but instead of throwing away everything after the first N chunks, splits it off as if it were starting a new sequence. This is good for multiprocessing, since many short sequences can be multitasked, whilst few long ones cannot", ) parser.add_option( "--min-length", dest="min_length", action="store", type="int", help="ignores any sequences under this number of chunks. This is useful with --split-length, which can leave very short sequences from the end of a split sequence", ) parser.add_option( "--progress-out", dest="progress_out", action="store", help="output logging info to a file instead of the command line", ) parser.add_option( "--init-model", dest="init_model", action="store", help="initialize the model using parameters from an already trained model", ) parser.add_option( "--init-ctrans", dest="init_ctrans", action="store", help="initialize the chord transition distribution using these parameters. Comma-separated list of params given as C0->C1-P, where C0 and C1 are chords (I, II, etc) and P is a float probability", ) parser.add_option( "--chord-set", dest="chord_set", action="store", help="use a chord set other than the default. Use value 'help' to see a list. Has no effect in combination with --init-model, since the old model's chord set will be used", ) parser.add_option( "-m", "--model-type", dest="model_type", action="store", help="select a model type: one of %s (default: standard)" % ", ".join(mt for mt in MODEL_TYPES.keys()), default="standard", ) options, arguments = parse_args_with_config(parser) if options.opts is not None and options.opts == "help": print options_help_text(RaphstoBaumWelchTrainer.OPTIONS, intro="Training options for Raphael and Stoddard HMMs") sys.exit(0) opts = ModuleOption.process_option_string(options.opts) if len(arguments) < 2: print >> sys.stderr, "You must specify a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] print >> sys.stderr, "Raphsto training beginning at %s" % datetime.now().isoformat(" ") # Create a logger to output the progress of the training to stdout or a file if options.progress_out is not None: stdout = False logfile = options.progress_out print >> sys.stderr, "Outputing logging info to %s" % logfile else: stdout = True logfile = None print >> sys.stderr, "Outputing logging to stdout" logger = create_logger(name="raphsto_train", filename=logfile, stdout=stdout) logger.info("Raphael and Stoddard HMM model training") if options.model_type not in MODEL_TYPES: print >> sys.stderr, "Model type must be one of: %s" % ", ".join(mt for mt in MODEL_TYPES) sys.exit(1) model_cls = MODEL_TYPES[options.model_type] if options.chord_set == "help": print "Available chord sets: %s" % ", ".join(constants.CHORD_SETS.keys()) sys.exit(0) elif options.chord_set is not None: # Check this chord set exists if options.chord_set not in constants.CHORD_SETS: print >> sys.stderr, "Chord set '%s' does not exist" % options.chord_set sys.exit(1) else: logger.info("Using chord set '%s'" % options.chord_set) # Read in the training data midis = InputSourceFile(filename) handlers = midis.get_handlers() logger.info("Reading in %d midi files..." % len(midis.inputs)) training_data = [] for i, mh in enumerate(handlers): logger.info("%s: %s" % (i, midis.inputs[i][0])) emissions = mh.get_emission_stream()[0] if options.max_length is not None and len(emissions) > options.max_length: logger.info("Truncating file %d to %d chunks (was %d)" % (i, options.max_length, len(emissions))) emissions = emissions[: options.max_length] if options.split_length is not None: logger.info("Splitting sequence %d into sequence no longer " "than %d chunks" % (i, options.split_length)) # Split up the sequence if it's too long while len(emissions) > options.split_length: training_data.append(emissions[: options.split_length]) emissions = emissions[options.split_length :] training_data.append(emissions) if options.min_length is not None: # Make sure there are no sequences under the minimum length # Just throw away any that are before_chuck = len(training_data) training_data = [seq for seq in training_data if len(seq) >= options.min_length] if len(training_data) != before_chuck: logger.info( "Threw away %d short sequences (below %d chunks)" % ((before_chuck - len(training_data)), options.min_length) ) logger.info( "Training on %d sequences. Lengths: %s" % (len(training_data), ", ".join(str(len(seq)) for seq in training_data)) ) if options.partitions is not None: parts = holdout_partition(training_data, options.partitions) models = [("%s%d" % (model_name, num), data) for num, data in enumerate(parts)] else: models = [(model_name, training_data)] # Number of processes to use if options.processes == -1: # Special value: means number of training sequences (one process per sequence) processes = len(training_data) else: processes = options.processes for part_name, data in models: # Instantiate a fresh model with this name logger.info("Training model '%s' on %d midis" % (part_name, len(data))) if options.init_model is not None: logger.info("Initializing using parameters from model '%s'" % options.init_model) # Load an already trained model as initialization model = model_cls.initialize_existing_model(options.init_model, model_name=part_name) else: # TODO: make these probs an option ctype_params = (0.5, 0.3, 0.2) logger.info("Initializing to naive chord types using parameters: " "%s, %s, %s" % ctype_params) init_kwargs = {"model_name": part_name} if options.chord_set is not None: # Specify a chord set for the model init_kwargs["chord_set"] = options.chord_set model = model_cls.initialize_chord_types(ctype_params, **init_kwargs) # Initialize the chord transition probabilities if given if options.init_ctrans is not None: logger.info("Initializing chord transition distribution to %s" % options.init_ctrans) model.set_chord_transition_probabilities(options.init_ctrans) # Retrain it with the loaded data trainer = model_cls.get_trainer()(model, options=opts) trainer.train(data, logger=logger, processes=processes, save_intermediate=True) print >> sys.stderr, "Training terminating at %s" % datetime.now().isoformat(" ")
def main(): usage = "%prog [options] <midi-file>" description = "Divides a midi file into chunks, with a given size and "\ "offset, and plays "\ "the chunks consecutively, with a gap between each." parser = OptionParser(usage=usage, description=description) parser.add_option( '-t', '--time-unit', dest="time_unit", action="store", type="float", help= "size of chunks in crotchet beats (according to the midi file's resolution)", default=4) parser.add_option('-o', '--tick-offset', dest="tick_offset", action="store", type="int", help="offset of the first chunk in midi ticks", default=0) parser.add_option( '-g', '--gap', dest="gap", action="store", type="float", help= "time to wait between playing each chunk in seconds (potentially float). It will take some time to load the chunk and the sequencer usually pauses before reporting it's done: this is not included in this value", default=0.0) parser.add_option('-p', '--print', dest="print_events", action="store_true", help="print out all events for each chunk") parser.add_option('--pno', '--print-note-ons', dest="print_note_ons", action="store_true", help="print only note-on events") parser.add_option( '--force-res', dest="force_res", action="store", type="int", help= "force the midi file's resolution to be the given value, rather than using that read from the file" ) parser.add_option('-s', '--start', dest="start", action="store", type="int", help="chunk number to start at", default=0) options, arguments = parse_args_with_config(parser) filename = arguments[0] # Load up the input midi file infile = read_midifile(filename, force_resolution=options.force_res) handler = MidiHandler(infile, time_unit=options.time_unit, tick_offset=options.tick_offset) slices = handler.get_slices() # Start at the requested chunk slices = slices[options.start:] print "Playing %d-beat chunks with a %d-tick offset" % ( options.time_unit, options.tick_offset) if options.start > 0: print "Start from chunk %d" % options.start print "Total chunks: %d" % len(slices) print "Ctrl+C to exit" print try: for i, slc in enumerate(slices): strm = slc.to_event_stream(cancel_playing=True) print "Playing chunk %d: %d-%d (%d events)" % ( i, slc.start, slc.end, len(strm.trackpool)) if options.print_events: print "\n".join(" %s" % ev for ev in sorted(strm.trackpool)) elif options.print_note_ons: print "\n".join(" %s" % ev for ev in sorted(strm.trackpool) \ if type(ev) is NoteOnEvent) play_stream(strm, block=True) if options.gap > 0.0: print " Waiting %s seconds..." % options.gap time.sleep(options.gap) except KeyboardInterrupt: print "Exiting"
def main(): usage = "%prog [<options>] <model-name> <training-input>" description = "Training of PCFG models." parser = OptionParser(usage=usage, description=description) parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \ help="Number of partitions to divide the data into. "\ "For train, divides the input file, trains a model on each "\ "partition's complement and appends partition number to "\ "the model names. For del, appends partition numbers to model "\ "names and deletes all the models. Recache does similarly. "\ "Has no effect for parse.") parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options") parser.add_option("--debug", dest="debug", action="store_true", help="Output verbose logging information to stderr") parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") options, arguments = parse_args_with_config(parser) if options.debug: log_level = logging.DEBUG else: log_level = logging.WARN # Create a logger for training logger = create_logger(log_level = log_level, name = "training", stderr = True) # Load a grammar grammar = get_grammar(options.grammar) # Get the pcfg model class for the formalism PcfgModel = grammar.formalism.PcfgModel # Parse the option string if options.training_opts is None: opts = {} elif options.training_opts.lower() == "help": print options_help_text(PcfgModel.TRAINING_OPTIONS, intro="Training options for PCFGs") sys.exit(0) else: opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), PcfgModel.TRAINING_OPTIONS) if len(arguments) == 0: print >>sys.stderr, "Specify a model name" models = PcfgModel.list_models() print >>sys.stderr, "Available models: %s" % ", ".join(models) sys.exit(1) model_name = arguments[0] print "Model base name:", model_name if options.partitions is not None: parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)] else: parts = [(None, model_name)] if len(arguments) < 2: print >>sys.stderr, "Specify an input file to read sequence data from" sys.exit(1) # Read in the training data from the given file seqs = SequenceIndex.from_file(arguments[1]) if options.partitions is not None: # Prepare each training partition datasets = holdout_partition(seqs.sequences, options.partitions) else: datasets = [seqs.sequences] for dataset,(parti,part_model) in zip(datasets,parts): # Train the named model on the sequence data model = PcfgModel.train(part_model, dataset, opts, grammar=grammar, logger=logger) model.save() print "Trained model", part_model
def main(): usage = "%prog [options] <model_name> <in-file>" description = "Trains a chord labeling model using the given "\ "input data. The data file may be a stored SequenceIndex file, or "\ "any other type of bulk data file." parser = OptionParser(usage=usage, description=description) parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.") parser.add_option('--opts', dest="training_opts", action="append", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.") # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") # Logging output parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end") options, arguments = parse_args_with_config(parser) grammar = Grammar() # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif "help" in [opt.lower() for opt in options.training_opts]: print options_help_text(HPChordLabeler.TRAINING_OPTIONS, intro="Training options:") sys.exit(0) else: training_opts = ModuleOption.process_option_string(options.training_opts) if len(arguments) < 2: print >>sys.stderr, "You must specify a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] # Load the sequence data # Only allow bulk types input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=get_input_type_names(single=False, bulk=True)) # Only partition the chord data, not the MIDI data if options.partitions is not None and not \ (isinstance(input_data, MidiTaggerTrainingBulkInput) and \ input_data.chords is not None): print >>sys.stderr, "Can only partition chord data and no chord data "\ "was supplied" sys.exit(1) if options.partitions: # The input includes chord training data parts = input_data.chords.get_partitions(options.partitions)[1] models = [("%s%d" % (model_name,num),chord_data) \ for num,chord_data in enumerate(parts)] else: models = [(model_name,None)] for part_name,chord_data in models: if options.log is not None: # Prepare a logger logfile = "%s%s.log" % (options.log, part_name) print "Logging output to file %s" % logfile logger = create_logger(filename=logfile) else: logger = None # Create a fresh model with this name model = HPChordLabeler.train(input_data, part_name, logger=logger, options=training_opts, chord_data=chord_data) print "Trained model %s" % (part_name)
def main(): usage = "%prog [options] <model_name> <in-file>" description = "Trains a chord labeling model using the given "\ "input data. The data file may be a stored SequenceIndex file, or "\ "any other type of bulk data file." parser = OptionParser(usage=usage, description=description) parser.add_option( '-p', '--partitions', dest="partitions", action="store", type="int", help= "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number." ) parser.add_option( '--opts', dest="training_opts", action="append", help= "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type." ) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) # Logging output parser.add_option( '--log', dest="log", action="store", help= "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end" ) options, arguments = parse_args_with_config(parser) grammar = Grammar() # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif "help" in [opt.lower() for opt in options.training_opts]: print options_help_text(HPChordLabeler.TRAINING_OPTIONS, intro="Training options:") sys.exit(0) else: training_opts = ModuleOption.process_option_string( options.training_opts) if len(arguments) < 2: print >> sys.stderr, "You must specify a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] # Load the sequence data # Only allow bulk types input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=get_input_type_names( single=False, bulk=True)) # Only partition the chord data, not the MIDI data if options.partitions is not None and not \ (isinstance(input_data, MidiTaggerTrainingBulkInput) and \ input_data.chords is not None): print >>sys.stderr, "Can only partition chord data and no chord data "\ "was supplied" sys.exit(1) if options.partitions: # The input includes chord training data parts = input_data.chords.get_partitions(options.partitions)[1] models = [("%s%d" % (model_name,num),chord_data) \ for num,chord_data in enumerate(parts)] else: models = [(model_name, None)] for part_name, chord_data in models: if options.log is not None: # Prepare a logger logfile = "%s%s.log" % (options.log, part_name) print "Logging output to file %s" % logfile logger = create_logger(filename=logfile) else: logger = None # Create a fresh model with this name model = HPChordLabeler.train(input_data, part_name, logger=logger, options=training_opts, chord_data=chord_data) print "Trained model %s" % (part_name)
def main(): usage = "%prog [options] <model_name> <in-file>" description = "Loads a chord labeling model and uses it to assign chord "\ "labels to the given MIDI file." parser = OptionParser(usage=usage, description=description) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='segmidi') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) # Labeling options parser.add_option( "--labeler-options", "--lopt", dest="labeler_options", action="append", help= "options for the labeler. Type '--lopt help' for a list of available options." ) parser.add_option( "--no-key", "--nk", dest="no_key", action="store_true", help="merge together labels with the same key (same as --lopt nokey)") # Output options parser.add_option( "--single", "-1", dest="single", action="store_true", help= "show only one chord per time segment (same as --lopt n=1, but formats the output in a simpler way)" ) parser.add_option( '-r', '--realize', dest="realize", action="store", help="realize the chord sequence as a midi file, overlaid on the input" ) parser.add_option( '--chords-only', dest="chords_only", action="store_true", help= "only realize the chords: don't overlay on the input midi (only works with -r)" ) options, arguments = parse_args_with_config(parser) if options.labeler_options is not None and "help" in options.labeler_options: print options_help_text(HPChordLabeler.LABELING_OPTIONS, intro="Options for HP chord labeler") sys.exit(0) if len(arguments) < 2: print >>sys.stderr, "You must specify a model name and an input "\ "(MIDI) data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] # Process the labeler options lopt_dict = ModuleOption.process_option_string(options.labeler_options) if options.single: # No point in getting more than one label, since we only display one lopt_dict['n'] = 1 if options.no_key: # Just set the nokey option lopt_dict['nokey'] = True # Check they're valid before doing anything else HPChordLabeler.process_labeling_options(lopt_dict) input_data = command_line_input(filename, filetype=options.filetype, options=options.file_options, allowed_types=['segmidi', 'bulk-segmidi']) bulk = not is_bulk_type(type(input_data)) if bulk: input_data = [input_data] for i, data in enumerate(input_data): input_stream = data.stream print "Read midi data in %d segments" % len(data) # Load the model model = HPChordLabeler.load_model(model_name) # Perform labeling labels = model.label(data, options=lopt_dict) # Try labeling as it will be passed to the tagger labs = model.label_lattice(data, options=lopt_dict) if options.single: # Special output for single label output print ", ".join(["%s" % timelabs[0][0] for timelabs in labels]) else: # Print out the labels for each timestep for time, timelabs in enumerate(labels): print "%d: %s" % (time, ", ".join([ "%s (%.2e)" % (label, prob) for (label, prob) in timelabs ])) if options.realize is not None: # Get the single best chord label for each time best_labels = [timelabs[0][0] for timelabs in labels] # Realize as a midi file print "Realizing output chord sequence" real = ChordSequenceRealizer(best_labels, model.chord_vocab, resolution=input_stream.resolution, chord_length=data.time_unit, text_events=True) if options.chords_only: # Don't overlay stream = real.generate(offset=data.tick_offset) else: stream = real.generate(overlay=input_stream, offset=data.tick_offset) if bulk: filename = "%s-%d" % (options.realize, i) else: filename = options.realize write_midifile(stream, filename)
def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a supertagging model using the given "\ "input data. Specify a model type (baseline1, etc) and a name to "\ "identify it. The data file may be a stored SequenceIndex file, or "\ "any other type of bulk data file. "\ "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS) parser = OptionParser(usage=usage, description=description) parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.") parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.") # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") # Logging output parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end") options, arguments = parse_args_with_config(parser) grammar = Grammar() # Get the model type first: we might not need the other args if len(arguments) == 0: print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments" model_type = arguments[0] if model_type not in TRAINABLE_MODELS: print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \ (model_type, ", ".join(TRAINABLE_MODELS)) sys.exit(1) if model_type not in TAGGERS: print >>sys.stderr, "'%s' isn't a registered model type. Check that "\ "the name in TRAINABLE_MODELS is correct" % model_type sys.exit(1) tagger_cls = get_tagger(model_type) if not issubclass(tagger_cls, ModelTagger): print >>sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (tagger_cls.__name__) sys.exit(1) model_cls = tagger_cls.MODEL_CLASS # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) # Get the rest of the args if len(arguments) < 3: print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_name = arguments[1] # Load the sequence data # Only allow bulk types input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=get_input_type_names(single=False, bulk=True)) if options.partitions is not None and options.partitions > 1: parts = input_data.get_partitions(options.partitions)[1] models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name,input_data)] for part_name,seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) if options.log is not None: # Prepare a logger logfile = "%s%s.log" % (options.log, part_name) print "Logging output to file %s" % logfile logger = create_logger(filename=logfile) else: logger = None # Train the model with the loaded data model.train(seqs, logger=logger) model.save() print "Trained model %s" % (part_name)
def main(): set_proc_title("jazzparser") ######################################################## usage = "jazzparser [<options>]" description = "The main parser interface for the Jazz Parser" ## Process the input options optparser = OptionParser(usage=usage, description=description) ### # File input options group = OptionGroup(optparser, "Input", "Input type and location") optparser.add_option_group(group) group.add_option("--file", "-f", dest="file", action="store", help="use a file to get parser input from. Use --filetype to specify the type of the file.") group.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords", default='chords') group.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") group.add_option("--index", "--indices", dest="input_index", action="store", help="select individual inputs to process. Specify as a comma-separated list of indices. All inputs are loaded as usual, but only the ith input is processed, for each i in the list") group.add_option("--only-load", dest="only_load", action="store_true", help="don't do anything with the inputs, just load and list them. Handy for checking the inputs load and getting their indices") group.add_option("--partitions", dest="partitions", action="store", type="int", help="divide the input data into this number of partitions and use a different set of models for each. For any parser, tagger and backoff that takes a 'model' argument, the partition number will be appended to the given value") group.add_option("--seq-parts", "--sequence-partitions", dest="sequence_partitions", action="store", help="use a chord sequence index to partition the inputs. Input type (bulk) must support association of the inputs with chord sequences by id. Sequences in the given sequence index file are partitioned n ways (--partitions) and the inputs are processed according to their associated sequence.") group.add_option("--continue", "--skip-done", dest="skip_done", action="store_true", help="skip any inputs for which a readable results file already exists. This is useful for continuing a bulk job that was stopped in the middle") ### group = OptionGroup(optparser, "Parser", "Parser, supertagger and backoff parser") optparser.add_option_group(group) group.add_option("-d", "--derivations", dest="derivations", action="store_true", help="keep derivation logs during parse.") group.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") # Parser options group.add_option("-p", "--parser", dest="parser", action="store", help="use the named parser algorithm instead of the default. Use '-p help' to see the list of available parsers. Default: %s" % settings.DEFAULT_PARSER, default=settings.DEFAULT_PARSER) group.add_option("--popt", "--parser-options", dest="popts", action="append", help="specify options for the parser. Type '--popt help', using '--parser <name>' to select a parser module, to get a list of options.") # Tagger options group.add_option("-t", "--tagger", "--supertagger", dest="supertagger", action="store", help="run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s" % settings.DEFAULT_SUPERTAGGER, default=settings.DEFAULT_SUPERTAGGER) group.add_option("--topt", "--tagger-options", dest="topts", action="append", help="specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options.") # Backoff options group.add_option("-b", "--backoff", "--noparse", dest="backoff", action="store", help="use the named backoff model as a backoff if the parser produces no results") group.add_option("--bopt", "--backoff-options", "--backoff-options", "--npo", dest="backoff_opts", action="append", help="specify options for the backoff model. Type '--npo help', using '--backoff <name>' to select a backoff modules, to get a list of options.") ### # Multiprocessing options group = OptionGroup(optparser, "Multiprocessing") optparser.add_option_group(group) group.add_option("--processes", dest="processes", action="store", type="int", help="number of processes to create to perform parses in parallel. Default: 1, i.e. no process pool. Use -1 to create a process for every input", default=1) ### # Output options group = OptionGroup(optparser, "Output") optparser.add_option_group(group) group.add_option("--output", dest="output", action="store", help="directory name to output parse results to. A filename specific to the individual input will be appended to this") group.add_option("--topn", dest="topn", action="store", type="int", help="limit the number of final results to store in the output file to the top n by probability. By default, stores all") group.add_option("--output-opts", "--oopts", dest="output_opts", action="store", help="options that affect the output formatting. Use '--output-opts help' for a list of options.") group.add_option("-a", "--atomic-results", dest="atoms_only", action="store_true", help="only include atomic categories in the results.") group.add_option("-l", "--latex", dest="latex", action="store_true", help="output all results as Latex source. Used to produce a whole Latex document, but doesn't any more") group.add_option("--all-times", dest="all_times", action="store_true", help="display all timing information on semantics in output.") group.add_option("-v", "--debug", dest="debug", action="store_true", help="output verbose debugging information.") group.add_option("--time", dest="time", action="store_true", help="time how long the parse takes and output with the results.") group.add_option("--no-results", dest="no_results", action="store_true", help="don't print out the parse results at the end. Obviously you'll want to make sure they're going to a file (--output). This is useful for bulk parse jobs, where the results produce a lot of unnecessary output") group.add_option("--no-progress", dest="no_progress", action="store_true", help="don't output the summary of completed sequences after each one finishes") ### # Output analysis and harmonical group = OptionGroup(optparser, "Output processing", "Output analysis and harmonical") optparser.add_option_group(group) group.add_option("--harmonical", dest="harmonical", action="store", help="use the harmonical to play the chords justly intoned according to the top result and output to a wave file.") group.add_option("--enharmonical", dest="enharmonical", action="store", help="use the harmonical to play the chords in equal temperament and output to a wave file.") group.add_option("--midi", dest="midi", action="store_true", help="generate MIDI files from the harmonical, instead of wave files.") group.add_option("--tempo", dest="tempo", action="store", type=int, help="tempo to use for the generated music (see --harmonical/--enharmonical). Default: 120", default=120) group.add_option("--lh-analysis", dest="lh_analysis", action="store_true", help="output the Longuet-Higgins space interpretation of the semantics for each result.") group.add_option("--lh-coordinates", dest="lh_coord", action="store_true", help="like lh-analysis, but displays the coordinates of the points instead of their names.") ### # Logging options group = OptionGroup(optparser, "Logging") optparser.add_option_group(group) group.add_option("--long-progress", dest="long_progress", action="store_true", help="print a summary of the chart so far after each chord/word has been processed.") group.add_option("--progress", "--short-progress", dest="short_progress", action="store_true", help="print a small amount of information out during parsing to indicate progress.") group.add_option("--logger", dest="logger", action="store", help="directory to put parser logging in. A filename based on an identifier for each individual input will be appended.") ### # Shell options group = OptionGroup(optparser, "Shell", "Interactive shell for inspecting results and parser state") optparser.add_option_group(group) group.add_option("-i", "--interactive", dest="interactive", action="store_true", help="enter interactive mode after parsing.") group.add_option("--error", dest="error_shell", action="store_true", help="catch any errors, report them and then enter the interactive shell. This also catches keyboard interrupts, so you can use it to halt parsing and enter the shell.") # Read in command line options and args options, clinput = parse_args_with_config(optparser) ########################### Option processing #################### # Get log level option first, so we can start using the logger if options.debug: log_level = logging.DEBUG else: log_level = logging.INFO # Set up a logger init_logging(log_level) if options.latex: settings.OPTIONS.OUTPUT_LATEX = True if options.logger: # Directory parse_logger_dir = options.logger check_directory(parse_logger_dir) else: parse_logger_dir = None ######## Grammar ######## # Check the grammar actually exists grammar_names = get_grammar_names() if options.grammar is not None and options.grammar not in grammar_names: # This is not a valid grammar name logger.error("The grammar '%s' does not exist. Possible "\ "grammars are: %s." % (options.grammar, ", ".join(grammar_names))) return 1 grammar = get_grammar(options.grammar) ######## Parser ######## # Load the requested parser from jazzparser.parsers import PARSERS if options.parser.lower() == "help": print "Available parsers are: %s" % ", ".join(PARSERS) return 0 try: parser_cls = get_parser(options.parser) except ParserLoadError: logger.error("The parser '%s' could not be loaded. Possible "\ "parsers are: %s" % (options.parser, ", ".join(PARSERS))) return 1 # Get parser options if options.popts is not None: poptstr = options.popts if "help" in [s.strip().lower() for s in poptstr]: # Output this tagger's option help from jazzparser.utils.options import options_help_text print options_help_text(parser_cls.PARSER_OPTIONS, intro="Available options for selected parser") return 0 poptstr = ":".join(poptstr) else: poptstr = "" popts = ModuleOption.process_option_string(poptstr) # Check that the options are valid try: parser_cls.check_options(popts) except ModuleOptionError, err: logger.error("Problem with parser options (--popt): %s" % err) return 1