Ejemplo n.º 1
0
def create_naive_training_data():
  '''
  For training the most naive RNN model, where each bird sequence is input
  as its own song, where every timestep has the same binary encoding of the
  bird for the file. 

  Returns tuple (train_seq, targets)
  train_seq: list of np arrays representing each sequence to be used for
             training. Each array is seq_length x input dimension
  targets:   list of np arrays representing the outputs of each sequence
             for training. Each array is seq_length x output dimenison
  '''
  # load mfccs for every training sequence as a list of tuples
  # [(species1, seq_array),...]
  output_dim = len(names.SPECIES)
  train_data = loader.load_training_mfccs(names.get_species_list()).items()
  train_seq_mfccs = [x[1] for x in train_data]

  # get the targets to be binary representations of which species is singing
  target_indices = [x[0] for x in train_data]
  target_indices = [names.get_index_for_species(name) for name in target_indices]
  targets = [np.zeros((mfcc.shape[0], output_dim), dtype='int32') for
      mfcc in train_seq_mfccs]

  # set the output binary representations
  for i in range(len(train_data)):
    output_node_index = target_indices[i] - 1 # to index from zero
    targets[i][:, output_node_index] = 1

  return (train_seq_mfccs, targets)
Ejemplo n.º 2
0
def load_training_data():
  '''
  Returns a dict 
    {species_name : { 'labels' : [label_1, label2,...]
                      'samples' : np.array } 
  The labels is of length num_frames
  The array is of size (num_frames x d)
  '''
  train_data = loader.load_training_mfccs(names.get_species_list())
  return train_data
Ejemplo n.º 3
0
"""
make_model_two.py - A script to create the training files for svm_hmm_learn
    (and in the future, the testing files for svm_hmm_classify) to build model
    two as described in todo.md.
"""
from env import validator
from input import loader, names
import numpy as np
import output.svm

if __name__ == '__main__':
  validator.check_environs()

  species_list = names.get_species_list()

  raw_training_data = loader.load_training_mfccs(species_list)

  for species in species_list:
    positive_examples = {}
    positive_examples[species] = raw_training_data[species]
    
    negative_examples = {}
    for other_species in species_list:
      if other_species != species:
        negative_examples[other_species] = raw_training_data[other_species]
    
    training_filename = "train_%s.dat" % species
    output.svm.write_species_training_file(
        positive_examples, negative_examples, 'two', training_filename)