def train(self, epoch, data, spklist, aux_data=None): self.network.train() curr_step = 0 data_loader = KaldiDataRandomQueue(data, spklist, num_parallel=self.params.num_parallel_datasets, max_qsize=self.params.max_queue_size, num_speakers=self.params.num_speakers_per_batch, num_segments=self.params.num_segments_per_speaker, min_len=self.params.min_segment_len, max_len=self.params.max_segment_len, shuffle=True) data_loader.start() sum_loss, sum_samples = 0, 0 for step in range(curr_step % self.params.num_steps_per_epoch, self.params.num_steps_per_epoch): features, labels = data_loader.fetch() sum_samples += len(features) features, labels = self.transform(features, labels) out, _ = self.network(features) torch.cuda.empty_cache() loss = self.loss_network(out, labels) sum_loss += loss.item() * len(features) if step % self.params.show_training_process == 0: with open(os.path.join(self.model_log, "iter_loss_log"), 'a') as iter_f: iter_f.write("Time:{}, Epoch:{}, Iter:{}, Loss:{}\n".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), epoch, step, sum_loss / sum_samples)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() curr_step += 1 with open(os.path.join(self.model_log, "epoch_loss_log"), 'a') as epoch_f: epoch_f.write("Time:{}, Epoch:{}, Loss:{}\n".format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), epoch, sum_loss / sum_samples)) self.save(epoch=epoch, model=self.network, optimizer=self.optimizer) data_loader.stop()
def train_tune_lr(self, data, spklist, tune_period=100, aux_data=None): """Tune the learning rate. According to: https://www.kdnuggets.com/2017/11/estimating-optimal-learning-rate-deep-neural-network.html Args: data: The training data directory. spklist: The spklist is a file map speaker name to the index. tune_period: How many steps per learning rate. aux_data: The auxiliary data directory. """ # initialize all variables self.sess.run(tf.global_variables_initializer()) # We need to load the model sometimes, since we may try to find the learning rate for fine-tuning. if os.path.isfile(os.path.join(self.model, "checkpoint")): self.load() data_loader = KaldiDataRandomQueue( data, spklist, num_parallel=self.params.num_parallel_datasets, max_qsize=self.params.max_queue_size, num_speakers=self.params.num_speakers_per_batch, num_segments=self.params.num_segments_per_speaker, min_len=self.params.min_segment_len, max_len=self.params.max_segment_len, shuffle=True) data_loader.start() # The learning rate normally varies from 1e-5 to 1 # Some common values: # 1. factor = 1.15 # tune_period = 200 # tune_times = 100 init_learning_rate = 1e-5 factor = 1.15 tune_times = 100 fp_lr = open(os.path.join(self.model, "learning_rate_tuning"), "w") for step in range(tune_period * tune_times): lr = init_learning_rate * (factor**(step // tune_period)) try: if step % tune_period == 0: train_ops = [ self.train_ops, self.train_op, self.train_summary ] # train_ops = [self.train_ops, self.train_op] start_time = time.time() features, labels = data_loader.fetch() train_val = self.sess.run(train_ops, feed_dict={ self.train_features: features, self.train_labels: labels, self.global_step: 0, self.learning_rate: lr }) end_time = time.time() tf.logging.info( "Epoch: step: %2d, time: %.4f s/step, lr: %f, raw loss: %f, total loss: %f" \ % (step, end_time - start_time, lr, train_val[0]["raw_loss"], train_val[0]["loss"])) fp_lr.write("%d %f %f\n" % (step, lr, train_val[0]["loss"])) self.summary_writer.add_summary(train_val[-1], step) else: features, labels = data_loader.fetch() _ = self.sess.run(self.train_op, feed_dict={ self.train_features: features, self.train_labels: labels, self.global_step: 0, self.learning_rate: lr }) except DataOutOfRange: tf.logging.info("Finished reading features.") break data_loader.stop() fp_lr.close() return
def train(self, data, spklist, learning_rate, aux_data=None): """Train the model. Args: data: The training data directory. spklist: The spklist is a file map speaker name to the index. learning_rate: The learning rate is passed by the main program. The main program can easily tune the learning rate according to the validation accuracy or anything else. aux_data: The auxiliary data (maybe useful in child class.) """ # initialize all variables self.sess.run(tf.global_variables_initializer()) # curr_step is the real step the training at. curr_step = 0 # Load the model if we have if os.path.isfile(os.path.join(self.model, "checkpoint")): curr_step = self.load() # The data loader data_loader = KaldiDataRandomQueue( data, spklist, num_parallel=self.params.num_parallel_datasets, max_qsize=self.params.max_queue_size, num_speakers=self.params.num_speakers_per_batch, num_segments=self.params.num_segments_per_speaker, min_len=self.params.min_segment_len, max_len=self.params.max_segment_len, shuffle=True) data_loader.start() epoch = int(curr_step / self.params.num_steps_per_epoch) for step in range(curr_step % self.params.num_steps_per_epoch, self.params.num_steps_per_epoch): try: if step % self.params.save_summary_steps == 0 or step % self.params.show_training_progress == 0: train_ops = [self.train_ops, self.train_op] if step % self.params.save_summary_steps == 0: train_ops.append(self.train_summary) start_time = time.time() features, labels = data_loader.fetch() train_val = self.sess.run(train_ops, feed_dict={ self.train_features: features, self.train_labels: labels, self.global_step: curr_step, self.learning_rate: learning_rate }) end_time = time.time() tf.logging.info( "Epoch: [%2d] step: [%2d/%2d] time: %.4f s/step, raw loss: %f, total loss: %f" % (epoch, step, self.params.num_steps_per_epoch, end_time - start_time, train_val[0]["raw_loss"], train_val[0]["loss"])) if step % self.params.save_summary_steps == 0: self.summary_writer.add_summary( train_val[-1], curr_step) else: # Only compute optimizer. features, labels = data_loader.fetch() _ = self.sess.run(self.train_op, feed_dict={ self.train_features: features, self.train_labels: labels, self.global_step: curr_step, self.learning_rate: learning_rate }) if step % self.params.save_checkpoints_steps == 0 and curr_step != 0: self.save(curr_step) curr_step += 1 except DataOutOfRange: tf.logging.info("Finished reading features.") break data_loader.stop() self.save(curr_step) return
def train(self, data, spklist, learning_rate, aux_data=None): """Train the model. Args: data: The training data directory. spklist: The spklist is a file map speaker name to the index. learning_rate: The learning rate is passed by the main program. The main program can easily tune the learning rate according to the validation accuracy or anything else. aux_data: The auxiliary data (maybe useful in child class.) """ # initialize all variables # graph = tf.get_default_graph() # kernel_six = graph.get_tensor_by_name('tdnn_svd6/tdnn6.5_dense/kernel:0') # def get_semi_orthogonal(mat): #pri# nt(mat.shape) # M = tf.transpose(mat) # #M = mat # I = tf.Variable(np.identity(M.shape[0]), dtype=tf.float32) # for _ in range(10): # P = tf.matmul(M, M, transpose_b=True) # alpha2 = tf.divide(tf.trace(tf.matmul(P, P, transpose_b=True)), tf.trace(P)) # M = M - (1 / (2.0 * alpha2)) * tf.matmul(tf.subtract(P, alpha2 * I), M) # P = tf.matmul(M, M, transpose_b=True) # alpha2 = tf.divide(tf.trace(tf.matmul(P, P, transpose_b=True)), tf.trace(P)) # M = M / alpha2 # return tf.transpose(M) # semi = get_semi_orthogonal(kernel_six) # semi_op = tf.assign(kernel_six, semi) self.sess.run(tf.global_variables_initializer()) # curr_step is the real step the training at. curr_step = 0 # Load the model if we have if os.path.isfile(os.path.join(self.model, "checkpoint")): curr_step = self.load() # The data loader data_loader = KaldiDataRandomQueue(data, spklist, num_parallel=self.params.num_parallel_datasets, max_qsize=self.params.max_queue_size, num_speakers=self.params.num_speakers_per_batch, num_segments=self.params.num_segments_per_speaker, min_len=self.params.min_segment_len, max_len=self.params.max_segment_len, shuffle=True) epoch = int(curr_step / self.params.num_steps_per_epoch) data_loader.start() for step in range(curr_step % self.params.num_steps_per_epoch, self.params.num_steps_per_epoch): try: # if step % 4 == 0: # # SEMI ORTHOGONA; # self.sess.run(semi_op) if step % self.params.save_summary_steps == 0 or step % self.params.show_training_progress == 0: train_ops = [self.train_ops, self.train_op] if step % self.params.save_summary_steps == 0: train_ops.append(self.train_summary) start_time = time.time() features, labels = data_loader.fetch() train_val = self.sess.run(train_ops, feed_dict={self.train_features: features, self.train_labels: labels, self.global_step: curr_step, self.learning_rate: learning_rate}) end_time = time.time() tf.logging.info( "Epoch: [%2d] step: [%2d/%2d] time: %.4f s/step, raw loss: %f, total loss: %f" % (epoch, step, self.params.num_steps_per_epoch, end_time - start_time, train_val[0]["raw_loss"], train_val[0]["loss"])) if step % self.params.save_summary_steps == 0: self.summary_writer.add_summary(train_val[-1], curr_step) else: # Only compute optimizer. features, labels = data_loader.fetch() _ = self.sess.run(self.train_op, feed_dict={self.train_features: features, self.train_labels: labels, self.global_step: curr_step, self.learning_rate: learning_rate}) if step % self.params.save_checkpoints_steps == 0 and curr_step != 0: self.save(curr_step) curr_step += 1 except DataOutOfRange: tf.logging.info("Finished reading features.") break data_loader.stop() self.save(curr_step) return