def create_naive_training_data(): ''' For training the most naive RNN model, where each bird sequence is input as its own song, where every timestep has the same binary encoding of the bird for the file. Returns tuple (train_seq, targets) train_seq: list of np arrays representing each sequence to be used for training. Each array is seq_length x input dimension targets: list of np arrays representing the outputs of each sequence for training. Each array is seq_length x output dimenison ''' # load mfccs for every training sequence as a list of tuples # [(species1, seq_array),...] output_dim = len(names.SPECIES) train_data = loader.load_training_mfccs(names.get_species_list()).items() train_seq_mfccs = [x[1] for x in train_data] # get the targets to be binary representations of which species is singing target_indices = [x[0] for x in train_data] target_indices = [names.get_index_for_species(name) for name in target_indices] targets = [np.zeros((mfcc.shape[0], output_dim), dtype='int32') for mfcc in train_seq_mfccs] # set the output binary representations for i in range(len(train_data)): output_node_index = target_indices[i] - 1 # to index from zero targets[i][:, output_node_index] = 1 return (train_seq_mfccs, targets)
def write_species_training_file( positive_examples, negative_examples, model_name, opt_training_filename="train.dat"): """ Write a .dat training file for use with svm_hmm_learn. positive_examples: a dictionary of { species_name : numpy.ndarray} (where arrays are size D x N) representing all of the species to be learned. These examples will be labeled with that species actual label from the names module. negative_examples: a dictionary of { species_name : numpy.ndarray } (where arrays are size D x N) representing all of the species to be treated as noise. These examples will be labeled with the NO_SPECIES label. training_data: a dictionary of { species_name : numpy.ndarray }. model_name: a unique string to use to identify this model. opt_training_filename: optional filename to use for training file. If not specified, defaults to "train.dat" Creates the directory ICML_BIRD_MODEL_PATH/model_name and creates the file ICML_BIRD_MODEL_PATH/model_name/train.dat that can be used by svm_hmm_learn to train the model. """ out_file = _get_out_file(model_name, opt_training_filename) print 'Writing examples to %s...' % out_file.name try: example_num = 1 for species in positive_examples: _write_example(example_num, positive_examples[species], out_file, True, # use_provided_labels species) example_num += 1 for species in negative_examples: label = names.get_index_for_species(names.NO_SPECIES) _write_example(example_num, negative_examples[species], out_file, False, # use_provided_labels species) example_num += 1 finally: out_file.close() print '...done'