def main(): usage = "%prog [options] <seq-file> <index>" description = "Outputs the key associated with each chord of a sequence "\ "from an annotated corpus" parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) < 2: print "You must specify a sequence file and index" sys.exit(1) index = int(arguments[1]) # Get the chord sequence seq = SequenceIndex.from_file(arguments[0]).sequence_by_index(index) print keys_for_sequence(seq)
def train(data, name, logger=None, options={}, chord_data=None): """ Initializes and trains an HMM in a supervised fashion using the given training data. """ if len(data) == 0: raise ModelTrainError, "empty training data set" # Prepare a dummy logger if none was given if logger is None: logger = create_dummy_logger() # Process the options dict options = HPChordLabeler.process_training_options(options) # Work out what kind of input data we've got # It should be a bulk input type: check what type the first input is input_type = detect_input_type(data[0], allowed=['segmidi', 'db-annotated']) logger.info(">>> Beginning training of HP chord labeler model '%s'" % name) # If we got midi tagger training data, it may include chord data as well if isinstance(data, MidiTaggerTrainingBulkInput) and \ data.chords is not None: if chord_data is None: # Use the chord data in the input data logger.info("Midi training data; chord corpus data available") chord_inputs = data.chords else: # Use the chord data that was given explicitly chord_inputs = chord_data midi_inputs = data elif isinstance(data, DbBulkInput): logger.info("Only chord corpus training data") # This was only chord input, no midi data chord_inputs = data midi_inputs = None else: chord_inputs = chord_data # Presumably this is another form of midi training data midi_inputs = data logger.info("Midi training data; no chord data was included") # Get the chord vocab from the options logger.info("Model chord vocabulary: %s" % options['vocab']) vocab, vocab_mapping = CHORD_VOCABS[options['vocab']] # Initialize a model according to the chord types logger.info("Initializing emission distributions to favour chord "\ "notes with chord probability %s" % (options['chordprob'])) model = HPChordLabeler.initialize_chords(options['chordprob'], \ options['maxnotes'], vocab, \ vocab_mapping, name=name) # If we have chord training data, use this to train the transition dist if chord_inputs is not None: logger.info("Training using chord data") # Construct the trees implicit in the annotations to get the # key of every chord logger.info("Preparing key data for annotated chord sequences") input_keys = [keys_for_sequence(dbinput) for dbinput in chord_inputs] # Run the supervised training of the transition distribution logger.info("Training transition distribution on chord sequences") model.train_transition_distribution(chord_inputs, input_keys) if midi_inputs is not None: logger.info("Training using midi data") # Preprocess the midi inputs so they're ready for the model training emissions = [midi_to_emission_stream(seq, remove_empty=False)[0] \ for seq in midi_inputs] # Use the midi data to train emission number dist logger.info("Training emission number distribution") model.train_emission_number_distribution(emissions) ####### EM unsupervised training on the midi data # Pull out the options to pass to the trainer # These are a subset of the model training options bw_opt_names = [opt.name for opt in HPBaumWelchTrainer.OPTIONS] bw_opts = dict([(name,val) for (name,val) in options.items() \ if name in bw_opt_names]) # Create a Baum-Welch trainer trainer = HPBaumWelchTrainer(model, bw_opts) # Do the Baum-Welch training model = trainer.train(emissions, logger=logger) logger.info("Training complete") return model