def create_naive_training_data(): ''' For training the most naive RNN model, where each bird sequence is input as its own song, where every timestep has the same binary encoding of the bird for the file. Returns tuple (train_seq, targets) train_seq: list of np arrays representing each sequence to be used for training. Each array is seq_length x input dimension targets: list of np arrays representing the outputs of each sequence for training. Each array is seq_length x output dimenison ''' # load mfccs for every training sequence as a list of tuples # [(species1, seq_array),...] output_dim = len(names.SPECIES) train_data = loader.load_training_mfccs(names.get_species_list()).items() train_seq_mfccs = [x[1] for x in train_data] # get the targets to be binary representations of which species is singing target_indices = [x[0] for x in train_data] target_indices = [names.get_index_for_species(name) for name in target_indices] targets = [np.zeros((mfcc.shape[0], output_dim), dtype='int32') for mfcc in train_seq_mfccs] # set the output binary representations for i in range(len(train_data)): output_node_index = target_indices[i] - 1 # to index from zero targets[i][:, output_node_index] = 1 return (train_seq_mfccs, targets)
def load_training_data(): ''' Returns a dict {species_name : { 'labels' : [label_1, label2,...] 'samples' : np.array } The labels is of length num_frames The array is of size (num_frames x d) ''' train_data = loader.load_training_mfccs(names.get_species_list()) return train_data
""" make_model_two.py - A script to create the training files for svm_hmm_learn (and in the future, the testing files for svm_hmm_classify) to build model two as described in todo.md. """ from env import validator from input import loader, names import numpy as np import output.svm if __name__ == '__main__': validator.check_environs() species_list = names.get_species_list() raw_training_data = loader.load_training_mfccs(species_list) for species in species_list: positive_examples = {} positive_examples[species] = raw_training_data[species] negative_examples = {} for other_species in species_list: if other_species != species: negative_examples[other_species] = raw_training_data[other_species] training_filename = "train_%s.dat" % species output.svm.write_species_training_file( positive_examples, negative_examples, 'two', training_filename)