def train_truncated_model(self, train_x, train_y, batch_size, num_of_epochs, shuffle_data): ### Method 1 ### temp_train_x = data_utils.transform_data_to_3d_matrix( train_x, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data) print(("Input shape: " + str(temp_train_x.shape))) temp_train_y = data_utils.transform_data_to_3d_matrix( train_y, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data) print(("Output shape: " + str(temp_train_y.shape))) if self.stateful: temp_train_x, temp_train_y = data_utils.get_stateful_data( temp_train_x, temp_train_y, batch_size) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=num_of_epochs, shuffle=False, verbose=1)
def train_bucket_model_without_padding(self, train_x, train_y, train_flen, batch_size, num_of_epochs, shuffle_data): ### Method 3 ### train_count_list = list(train_flen['framenum2utt'].keys()) if shuffle_data: random.seed(271638) random.shuffle(train_count_list) train_file_number = len(train_x) for epoch_num in range(num_of_epochs): print(('Epoch: %d/%d ' % (epoch_num + 1, num_of_epochs))) file_num = 0 for sequence_length in train_count_list: train_idx_list = train_flen['framenum2utt'][sequence_length] sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix( sub_train_x, max_length=sequence_length) temp_train_y = data_utils.transform_data_to_3d_matrix( sub_train_y, max_length=sequence_length) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=1, verbose=0) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) sys.stdout.write("\n")
def train_split_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): ### Method 3 ### train_id_list = list(train_flen['utt2framenum'].keys()) if shuffle_data: random.seed(271638) random.shuffle(train_id_list) train_file_number = len(train_id_list) for epoch_num in range(num_of_epochs): print(('Epoch: %d/%d ' % (epoch_num + 1, num_of_epochs))) file_num = 0 while file_num < train_file_number: train_idx_list = train_id_list[file_num:file_num + batch_size] sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix( sub_train_x, seq_length=self.seq_length, merge_size=self.merge_size) temp_train_y = data_utils.transform_data_to_3d_matrix( sub_train_y, seq_length=self.seq_length, merge_size=self.merge_size) self.model.train_on_batch(temp_train_x, temp_train_y) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))
def train_split_model_keras_version(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): """This function is not used as of now """ ### Method 3 ### temp_train_x = data_utils.transform_data_to_3d_matrix( train_x, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data) print(("Input shape: " + str(temp_train_x.shape))) temp_train_y = data_utils.transform_data_to_3d_matrix( train_y, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data) print(("Output shape: " + str(temp_train_y.shape))) if self.stateful: temp_train_x, temp_train_y = data_utils.get_stateful_data( temp_train_x, temp_train_y, batch_size) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=num_of_epochs)
def train_bucket_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): ### Method 2 ### train_fnum_list = np.array(list(train_flen['framenum2utt'].keys())) train_range_list = list(range(min(train_fnum_list), max(train_fnum_list)+1, self.bucket_range)) if shuffle_data: random.seed(271638) random.shuffle(train_range_list) train_file_number = len(train_x) for epoch_num in range(num_of_epochs): print(('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs))) file_num = 0 for frame_num in train_range_list: min_seq_length = frame_num max_seq_length = frame_num+self.bucket_range sub_train_list = train_fnum_list[(train_fnum_list>=min_seq_length) & (train_fnum_list<max_seq_length)] if len(sub_train_list)==0: continue; train_idx_list = sum([train_flen['framenum2utt'][framenum] for framenum in sub_train_list], []) sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, shuffle=False, epochs=1, verbose=0) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))
def train_bucket_model_without_padding(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): """This function is not used as of now """ ### Method 4 ### train_count_list = list(train_flen['framenum2utt'].keys()) if shuffle_data: random.seed(271638) random.shuffle(train_count_list) train_file_number = len(train_x) for epoch_num in range(num_of_epochs): print(('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs))) file_num = 0 for sequence_length in train_count_list: train_idx_list = train_flen['framenum2utt'][sequence_length] sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=sequence_length) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=sequence_length) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=1, verbose=0) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) sys.stdout.write("\n")
def train_bucket_model_with_padding(self, train_x, train_y, train_flen, batch_size, num_of_epochs, shuffle_data): ### Method 3 ### train_fnum_list = np.array(train_flen['framenum2utt'].keys()) train_range_list = range(min(train_fnum_list), max(train_fnum_list), self.bucket_range) if shuffle_data: random.seed(271638) random.shuffle(train_range_list) train_file_number = len(train_x) for epoch_num in xrange(num_of_epochs): print 'Epoch: %d/%d ' %(epoch_num+1, num_of_epochs) file_num = 0 for frame_num in train_range_list: min_seq_length = frame_num max_seq_length = frame_num+self.bucket_range sub_train_list = train_fnum_list[(train_fnum_list>min_seq_length) & (train_fnum_list<=max_seq_length)] if len(sub_train_list)==0: continue; train_idx_list = sum([train_flen['framenum2utt'][framenum] for framenum in sub_train_list], []) sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=1, verbose=0) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) sys.stdout.write("\n")
def train_bucket_model_with_padding(self, train_x, train_y, train_flen, batch_size, num_of_epochs, shuffle_data): ### Method 2 ### train_fnum_list = np.array(train_flen['framenum2utt'].keys()) train_range_list = range(min(train_fnum_list), max(train_fnum_list), self.bucket_range) if shuffle_data: random.seed(271638) random.shuffle(train_range_list) train_file_number = len(train_x) for epoch_num in xrange(num_of_epochs): print('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs)) file_num = 0 for frame_num in train_range_list: min_seq_length = frame_num max_seq_length = frame_num+self.bucket_range sub_train_list = train_fnum_list[(train_fnum_list>min_seq_length) & (train_fnum_list<=max_seq_length)] if len(sub_train_list)==0: continue; train_idx_list = sum([train_flen['framenum2utt'][framenum] for framenum in sub_train_list], []) sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=1, verbose=0) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) sys.stdout.write("\n")
def train_bucket_model(self, train_x, train_y, valid_x, valid_y, train_flen): # TODO: use packaged params ### Method 2 ### train_fnum_list = np.array(list(train_flen['framenum2utt'].keys())) train_range_list = list(range(min(train_fnum_list), max(train_fnum_list)+1, self.bucket_range)) if self.shuffle_data: random.seed(271638) random.shuffle(train_range_list) train_file_number = len(train_x) for epoch_num in range(self.num_of_epochs): print(('Epoch: %d/%d ' %(epoch_num+1, self.num_of_epochs))) file_num = 0 for frame_num in train_range_list: min_seq_length = frame_num max_seq_length = frame_num+self.bucket_range sub_train_list = train_fnum_list[(train_fnum_list>=min_seq_length) & (train_fnum_list<max_seq_length)] if len(sub_train_list)==0: continue; train_idx_list = sum([train_flen['framenum2utt'][framenum] for framenum in sub_train_list], []) sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length) self.model.fit(temp_train_x, temp_train_y, batch_size=self.batch_size, shuffle=False, epochs=1, verbose=0) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))
def train_padding_model(self, train_x, train_y, valid_x, valid_y, train_flen): # TODO: use packaged params ### Method 1 ### train_id_list = list(train_flen['utt2framenum'].keys()) if self.shuffle_data: random.seed(271638) random.shuffle(train_id_list) train_file_number = len(train_id_list) for epoch_num in range(self.num_of_epochs): print(('Epoch: %d/%d ' %(epoch_num+1, self.num_of_epochs))) file_num = 0 while file_num < train_file_number: train_idx_list = train_id_list[file_num: file_num + self.batch_size] seq_len_arr = [train_flen['utt2framenum'][filename] for filename in train_idx_list] max_seq_length = max(seq_len_arr) sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length) self.model.train_on_batch(temp_train_x, temp_train_y) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))
def train_truncated_model(self, train_x, train_y, batch_size, num_of_epochs, shuffle_data): ### Method 1 ### temp_train_x = data_utils.transform_data_to_3d_matrix(train_x, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data) print("Input shape: "+str(temp_train_x.shape)) temp_train_y = data_utils.transform_data_to_3d_matrix(train_y, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data) print("Output shape: "+str(temp_train_y.shape)) if self.stateful: temp_train_x, temp_train_y = get_stateful_data(temp_train_x, temp_train_y, batch_size) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=num_of_epochs, shuffle=False, verbose=1)
def train_split_model_keras_version(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): """This function is not used as of now """ ### Method 3 ### temp_train_x = data_utils.transform_data_to_3d_matrix(train_x, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data) print(("Input shape: "+str(temp_train_x.shape))) temp_train_y = data_utils.transform_data_to_3d_matrix(train_y, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data) print(("Output shape: "+str(temp_train_y.shape))) if self.stateful: temp_train_x, temp_train_y = data_utils.get_stateful_data(temp_train_x, temp_train_y, batch_size) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=num_of_epochs)
def train_split_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): ### Method 3 ### train_id_list = list(train_flen['utt2framenum'].keys()) if shuffle_data: random.seed(271638) random.shuffle(train_id_list) train_file_number = len(train_id_list) for epoch_num in range(num_of_epochs): print(('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs))) file_num = 0 while file_num < train_file_number: train_idx_list = train_id_list[file_num: file_num + batch_size] sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, seq_length=self.seq_length, merge_size=self.merge_size) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, seq_length=self.seq_length, merge_size=self.merge_size) self.model.train_on_batch(temp_train_x, temp_train_y) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))