def generate_train(self): '''Generate mini-batch data for training. Returns: batch_data_dict: dict containing audio_name, feature and target ''' while True: # Reset pointer if self.pointer >= len(self.train_audio_indexes): self.pointer = 0 self.random_state.shuffle(self.train_audio_indexes) # Get batch audio_indexes batch_audio_indexes = self.train_audio_indexes[self.pointer:self. pointer + self.batch_size] self.pointer += self.batch_size batch_data_dict = {} batch_data_dict['filename'] = \ self.data_dict['filename'][batch_audio_indexes] batch_feature = self.data_dict['feature'][batch_audio_indexes] # batch_feature = self.transform(batch_feature) batch_data_dict['feature'] = batch_feature sparse_target = self.data_dict['target'][batch_audio_indexes] batch_data_dict['target'] = sparse_to_categorical( sparse_target, self.in_domain_classes_num) yield batch_data_dict
def generate_validate(self, data_type, max_iteration=None): '''Generate mini-batch data for training. Args: data_type: 'train' | 'validate' max_iteration: int, maximum iteration to validate to speed up validation Returns: batch_data_dict: dict containing audio_name, feature and target ''' batch_size = self.batch_size if data_type == 'train': audio_indexes = np.array(self.train_audio_indexes) elif data_type == 'validate': audio_indexes = np.array(self.validate_audio_indexes) else: raise Exception('Incorrect argument!') iteration = 0 pointer = 0 while True: if iteration == max_iteration: break # Reset pointer if pointer >= len(audio_indexes): break # Get batch audio_indexes batch_audio_indexes = audio_indexes[pointer:pointer + batch_size] pointer += batch_size iteration += 1 batch_data_dict = {} batch_data_dict['filename'] = \ self.data_dict['filename'][batch_audio_indexes] batch_feature = self.data_dict['feature'][batch_audio_indexes] batch_data_dict['feature'] = batch_feature batch_start = self.data_dict['start'][batch_audio_indexes] batch_data_dict['start'] = batch_start batch_end = self.data_dict['end'][batch_audio_indexes] batch_data_dict['end'] = batch_end sparse_target = [] for i in range(len(batch_audio_indexes)): sparse_target.append( self.data_dict['target'][batch_audio_indexes[i]]) # sparse_target = self.data_dict['target'][batch_audio_indexes] batch_data_dict['target'] = sparse_to_categorical( sparse_target, self.in_domain_classes_num) yield batch_data_dict