Example #1
0
def create_naive_training_data():
  '''
  For training the most naive RNN model, where each bird sequence is input
  as its own song, where every timestep has the same binary encoding of the
  bird for the file. 

  Returns tuple (train_seq, targets)
  train_seq: list of np arrays representing each sequence to be used for
             training. Each array is seq_length x input dimension
  targets:   list of np arrays representing the outputs of each sequence
             for training. Each array is seq_length x output dimenison
  '''
  # load mfccs for every training sequence as a list of tuples
  # [(species1, seq_array),...]
  output_dim = len(names.SPECIES)
  train_data = loader.load_training_mfccs(names.get_species_list()).items()
  train_seq_mfccs = [x[1] for x in train_data]

  # get the targets to be binary representations of which species is singing
  target_indices = [x[0] for x in train_data]
  target_indices = [names.get_index_for_species(name) for name in target_indices]
  targets = [np.zeros((mfcc.shape[0], output_dim), dtype='int32') for
      mfcc in train_seq_mfccs]

  # set the output binary representations
  for i in range(len(train_data)):
    output_node_index = target_indices[i] - 1 # to index from zero
    targets[i][:, output_node_index] = 1

  return (train_seq_mfccs, targets)
Example #2
0
def write_species_training_file(
    positive_examples,
    negative_examples,
    model_name,
    opt_training_filename="train.dat"):
  """
    Write a .dat training file for use with svm_hmm_learn.

    positive_examples: a dictionary of { species_name : numpy.ndarray}
        (where arrays are size D x N) representing all of the species to be
        learned. These examples will be labeled with that species actual label
        from the names module.

    negative_examples: a dictionary of { species_name : numpy.ndarray }
        (where arrays are size D x N) representing all of the species to be
        treated as noise. These examples will be labeled with the NO_SPECIES
        label.

    training_data: a dictionary of { species_name : numpy.ndarray }.

    model_name: a unique string to use to identify this model.

    opt_training_filename: optional filename to use for training file. If not
      specified, defaults to "train.dat"

    Creates the directory ICML_BIRD_MODEL_PATH/model_name and creates the file
    ICML_BIRD_MODEL_PATH/model_name/train.dat that can be used by svm_hmm_learn
    to train the model.
  """
  out_file = _get_out_file(model_name, opt_training_filename)
  print 'Writing examples to %s...' % out_file.name
  try:

    example_num = 1

    for species in positive_examples:
      _write_example(example_num,
                     positive_examples[species],
                     out_file,
                     True, # use_provided_labels
                     species)
      example_num += 1

    for species in negative_examples:
      label = names.get_index_for_species(names.NO_SPECIES)
      _write_example(example_num,
                     negative_examples[species],
                     out_file,
                     False, # use_provided_labels
                     species)
      example_num += 1

  finally:
    out_file.close()

  print '...done'