def build_model(self, lr, Epoch, loss='WGANLoss', optim='Adam', betas=(0.5, 0.999)): self.Epoch = Epoch #### Build Model #### if torch.cuda.is_available(): self.device = torch.device('cuda') self.Generator = Generator().cuda(self.device) self.Discriminator = Discriminator().cuda(self.device) else: self.device = torch.device('cpu') self.Generator = Generator() self.Discriminator = Discriminator() if 'Generator.pkl' in os.listdir(): self.Generator = torch.load('Generator.pkl') if 'Discriminator.pkl' in os.listdir(): self.Discriminator = torch.load('Discriminator.pkl') #### Build Loss Function #### self.criterion = utils.Loss(loss=loss) #### Build optimizing #### self.optimG = utils.optim(self.Generator, lr=lr, optim=optim, betas=betas) self.optimD = utils.optim(self.Discriminator, lr=lr, optim=optim, betas=betas)
def build_model(self, lr, EPOCH): if torch.cuda.is_available(): self.device = torch.device('cuda') self.VAE = Varational_AutoEncoder().cuda(device=self.device) else: self.device = torch.device('cpu') self.VAE = Varational_AutoEncoder() self.optim = utils.optim(self.VAE, lr=lr) self.EPOCH = EPOCH if 'VAE.pkl' in os.listdir(): self.VAE = torch.load('VAE.pkl')
def train(self): embedded = self.creat_model() lr = tf.placeholder(dtype=tf.float32, name="learning_rate") # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) sim_matrix = similarity(embedded, w, b) loss = loss_cal(sim_matrix, type=config.loss) trainable_vars = tf.trainable_variables() # get variable list optimizer = optim(lr) # get optimizer (type is determined by configuration) grads, vars = zip(*optimizer.compute_gradients(loss)) # compute gradients of variables with respect to loss grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale = [0.01 * grad for grad in grads_clip[:2]] + grads_clip[2:] # smaller gradient scale for w, b train_op = optimizer.apply_gradients(zip(grads_rescale, vars), global_step=global_step) # gradient update operation # check variables memory variable_count = np.sum(np.array([np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars])) print("total variables :", variable_count) tf.summary.scalar("loss", loss) merged = tf.summary.merge_all() saver = tf.train.Saver() with tf.Session() as sess: tf.global_variables_initializer().run() os.makedirs(os.path.join(config.model_path, "Check_Point"), exist_ok=True) # make folder to save model os.makedirs(os.path.join(config.model_path, "logs"), exist_ok=True) # make folder to save log writer = tf.summary.FileWriter(os.path.join(config.model_path, "logs"), sess.graph) lr_factor = 1 # lr decay factor ( 1/2 per 10000 iteration) loss_acc = 0 # accumulated loss ( for running average of loss) for iter in range(config.iteration): # run forward and backward propagation and update parameters _, loss_cur, summary = sess.run([train_op, loss, merged], feed_dict={self.fingerprint_input: random_batch(), lr: config.lr * lr_factor}) loss_acc += loss_cur # accumulated loss for each 100 iteration if iter % 10 == 0: writer.add_summary(summary, iter) # write at tensorboard if (iter + 1) % 100 == 0: print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100)) loss_acc = 0 # reset accumulated loss if (iter + 1) % 1000 == 0: lr_factor /= 2 # lr decay print("learning rate is decayed! current lr : ", config.lr * lr_factor) if (iter + 1) % 1000 == 0: saver.save(sess, os.path.join(config.model_path, "./Check_Point/model.ckpt"), global_step=iter // 1000) print("model is saved!")
def train(self, Identity = True): #### Check build_model #### try: getattr(self, 'GeneratorA') getattr(self, 'GeneratorB') getattr(self, 'DiscriminatorA') getattr(self, 'DiscriminatorB') getattr(self, 'criterion') getattr(self, 'L1Loss') except: assert False, 'Not apply build_model' #### Check load_dataset #### try: getattr(self, 'dataloaderA') getattr(self, 'dataloaderB') except: assert False, 'Not apply load_dataset' ImagePoolA = [] ImagePoolB = [] for epoch in range(self.EPOCH): if epoch < self.step: continue if epoch >= 100: lr = 0.0002 - 0.0002*(epoch-100)/100 for i, data in enumerate(zip(self.dataloaderA, self.dataloaderB)): real_A = data[0][0].to(self.device) real_B = data[1][0].to(self.device) self.optimG_A = utils.optim(self.GeneratorA, lr = self.lr, optim = self.optim, betas =self. betas) self.optimG_B = utils.optim(self.GeneratorB, lr = self.lr, optim = self.optim, betas = self.betas) self.optimD_A = utils.optim(self.DiscriminatorA, lr = self.lr, optim = self.optim, betas = self.betas) self.optimD_B = utils.optim(self.DiscriminatorB, lr = self.lr, optim = self.optim, betas = self.betas) fake_A = self.GeneratorA(real_B) fake_B = self.GeneratorB(real_A) recon_B = self.GeneratorB(fake_A) recon_A = self.GeneratorA(fake_B) DA_real_A = self.DiscriminatorA(real_A) DB_real_B = self.DiscriminatorB(real_B) DA_fake_A = self.DiscriminatorA(fake_A) DB_fake_B = self.DiscriminatorB(fake_B) ### train Generator ### self.optimG_A.zero_grad() self.optimG_B.zero_grad() lossG_A = self.criterion(DA_fake_A, torch.ones(DA_fake_A.shape).to(self.device)) lossG_B = self.criterion(DB_fake_B, torch.ones(DB_fake_B.shape).to(self.device)) cycleLoss= (self.L1Loss(recon_A, real_A) + self.L1Loss(recon_B, real_B)) * self.cycleLambda LossG = lossG_A + lossG_B + cycleLoss if Identity: LossG += 0.5 * self.cycleLambda * (self.L1Loss(fake_B, real_A) + self.L1Loss(fake_A, real_B)) LossG.backward(retain_graph = True) self.optimG_A.step() self.optimG_B.step() ### train Discriminator ### fake_A, p_A = ImagePool(ImagePoolA, fake_A, device = self.device) fake_B, p_B = ImagePool(ImagePoolB, fake_B, device = self.device) DA_real_A = self.DiscriminatorA(real_A) DB_real_B = self.DiscriminatorB(real_B) self.optimD_A.zero_grad() self.optimD_B.zero_grad() lossA_real = self.criterion(DA_real_A, torch.ones(DA_real_A.shape).to(self.device)) lossA_fake = self.criterion(DA_fake_A,torch.zeros(DA_fake_A.shape).to(self.device)) lossB_real = self.criterion(DB_real_B, torch.ones(DB_real_B.shape).to(self.device)) lossB_fake = self.criterion(DB_fake_B, torch.zeros(DB_real_B.shape).to(self.device)) (lossA_real + lossA_fake).backward(retain_graph = True) (lossB_real + lossB_fake).backward(retain_graph = True) self.optimD_A.step() self.optimD_B.step() utils.PresentationExperience(epoch, i, 100, G = LossG.item(), D_A = (lossA_real+lossA_fake).item(), D_B = (lossB_real+lossB_fake).item()) if i % 100 == 99: torch.save(self.DiscriminatorA, 'DiscriminatorA.pkl') torch.save(self.DiscriminatorB, 'DiscriminatorB.pkl') torch.save(self.GeneratorA, 'GeneratorA.pkl') torch.save(self.GeneratorB, 'GeneratorB.pkl')
def train(path): tf.reset_default_graph() # reset graph # draw graph batch = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32) # input batch (time x batch x n_mel) lr = tf.placeholder(dtype=tf.float32) # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) # embedding lstm (3-layer default) with tf.variable_scope("lstm"): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # define lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize print("embedded size: ", embedded.shape) # loss sim_matrix = similarity(embedded, w, b) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, type=config.loss) # optimizer operation trainable_vars = tf.trainable_variables() # get variable list optimizer = optim( lr) # get optimizer (type is determined by configuration) grads, vars = zip(*optimizer.compute_gradients( loss)) # compute gradients of variables with respect to loss grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale = [0.01 * grad for grad in grads_clip[:2] ] + grads_clip[2:] # smaller gradient scale for w, b train_op = optimizer.apply_gradients( zip(grads_rescale, vars), global_step=global_step) # gradient update operation # check variables memory variable_count = np.sum( np.array([ np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars ])) print("total variables :", variable_count) # record loss loss_summary = tf.summary.scalar("loss", loss) merged = tf.summary.merge_all() saver = tf.train.Saver() # training session with tf.Session() as sess: tf.global_variables_initializer().run() if (os.path.exists(path)): print("Restore from {}".format( os.path.join(path, "Check_Point/model.ckpt-2"))) saver.restore(sess, os.path.join(path, "Check_Point/model.ckpt-2") ) # restore variables from selected ckpt file else: os.makedirs(os.path.join(path, "Check_Point"), exist_ok=True) # make folder to save model os.makedirs(os.path.join(path, "logs"), exist_ok=True) # make folder to save log writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph) epoch = 0 lr_factor = 1 # lr decay factor ( 1/2 per 10000 iteration) loss_acc = 0 # accumulated loss ( for running average of loss) for iter in range(config.iteration): # run forward and backward propagation and update parameters _, loss_cur, summary = sess.run([train_op, loss, merged], feed_dict={ batch: random_batch(), lr: config.lr * lr_factor }) loss_acc += loss_cur # accumulated loss for each 100 iteration if iter % 10 == 0: writer.add_summary(summary, iter) # write at tensorboard if (iter + 1) % 100 == 0: print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100)) loss_acc = 0 # reset accumulated loss if (iter + 1) % 3000 == 0: lr_factor /= 2 # lr decay print("learning rate is decayed! current lr : ", config.lr * lr_factor) if (iter + 1) % 2500 == 0: saver.save(sess, os.path.join(path, "./Check_Point/model.ckpt"), global_step=iter // 2500) print("model is saved!")
def train(self): #### Check build_model #### try: getattr(self, 'Generator') getattr(self, 'Discriminator') getattr(self, 'criterion') getattr(self, 'L1Loss') except: assert False, 'Not apply build_model' #### Check load_dataset #### try: getattr(self, 'dataloader') except: assert False, 'Not apply load_dataset' for epoch in range(self.EPOCH): if epoch < self.step: continue if epoch >= 100: lr = 0.0002 - 0.0002 * (epoch - 100) / 100 for i, data in enumerate(zip(self.dataloader)): real = data[0].to(self.device) self.optimG = utils.optim(self.Generator, lr=self.lr, optim=self.optim, betas=self.betas) self.optimD = utils.optim(self.Discriminator, lr=self.lr, optim=self.optim, betas=self.betas) fake = self.GeneratorA(real) D_real = self.DiscriminatorA(real) D_fake = self.DiscriminatorA(fake) ### train Generator ### self.optimG.zero_grad() LossG = self.criterion( D_fake, torch.ones(D_fake.shape).to(self.device)) LossG.backward(retain_graph=True) self.optimG.step() ### train Discriminator ### self.optimD.zero_grad() loss_real = self.criterion( D_real, torch.ones(D_real.shape).to(self.device)) loss_fake = self.criterion( D_fake, torch.zeros(D_fake.shape).to(self.device)) (lossA_real + lossA_fake).backward(retain_graph=True) self.optimD.step() utils.PresentationExperience(epoch, i, 100, G=LossG.item(), D=(loss_real + loss_fake).item()) if i % 100 == 99: torch.save(self.Discriminator, 'Discriminator.pkl') torch.save(self.Generator, 'Generator.pkl')
def train(path): tf.reset_default_graph() # reset graph # draw graph batch = tf.placeholder(shape= [None, config.N*config.M, 40], dtype=tf.float32) # input batch (time x batch x n_mel) lr = tf.placeholder(dtype= tf.float32) # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer= np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer= np.array([-5], dtype=np.float32)) # embedding lstm (3-layer default) with tf.variable_scope("lstm"): lstm_cells = [tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer)] lstm = tf.contrib.rnn.MultiRNNCell(lstm_cells) # define lstm op and variables outputs, _ = tf.nn.dynamic_rnn(cell=lstm, inputs=batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize print("embedded size: ", embedded.shape) # loss sim_matrix = similarity(embedded, w, b) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, type=config.loss) # optimizer operation trainable_vars= tf.trainable_variables() # get variable list optimizer= optim(lr) # get optimizer (type is determined by configuration) grads, vars= zip(*optimizer.compute_gradients(loss)) # compute gradients of variables with respect to loss grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale= [0.01*grad for grad in grads_clip[:2]] + grads_clip[2:] # smaller gradient scale for w, b train_op= optimizer.apply_gradients(zip(grads_rescale, vars), global_step= global_step) # gradient update operation # check variables memory variable_count = np.sum(np.array([np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars])) print("total variables :", variable_count) # record loss loss_summary = tf.summary.scalar("loss", loss) merged = tf.summary.merge_all() saver = tf.train.Saver() # training session with tf.Session() as sess: tf.global_variables_initializer().run() if(os.path.exists(path)): print("Restore from {}".format(os.path.join(path, "Check_Point/model.ckpt-2"))) saver.restore(sess, os.path.join(path, "Check_Point/model.ckpt-2")) # restore variables from selected ckpt file else: os.makedirs(os.path.join(path, "Check_Point"), exist_ok=True) # make folder to save model os.makedirs(os.path.join(path, "logs"), exist_ok=True) # make folder to save log writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph) epoch = 0 lr_factor = 1 # lr decay factor ( 1/2 per 10000 iteration) loss_acc = 0 # accumulated loss ( for running average of loss) for iter in range(config.iteration): # run forward and backward propagation and update parameters _, loss_cur, summary = sess.run([train_op, loss, merged], feed_dict={batch: random_batch(), lr: config.lr*lr_factor}) loss_acc += loss_cur # accumulated loss for each 100 iteration if iter % 10 == 0: writer.add_summary(summary, iter) # write at tensorboard if (iter+1) % 100 == 0: print("(iter : %d) loss: %.4f" % ((iter+1),loss_acc/100)) loss_acc = 0 # reset accumulated loss if (iter+1) % 10000 == 0: lr_factor /= 2 # lr decay print("learning rate is decayed! current lr : ", config.lr*lr_factor) if (iter+1) % 10000 == 0: saver.save(sess, os.path.join(path, "./Check_Point/model.ckpt"), global_step=iter//10000) print("model is saved!")
def train(path): tf.reset_default_graph() # reset graph # draw graph batch = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32) # input batch (time x batch x n_mel) lr = tf.placeholder(dtype=tf.float32) # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) # embedding lstm (3-layer default) with tf.variable_scope("lstm"): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # define lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize print("embedded size: ", embedded.shape) # loss sim_matrix = similarity(embedded, w, b) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, type=config.loss) # optimizer operation trainable_vars = tf.trainable_variables() # get variable list optimizer = optim( lr) # get optimizer (type is determined by configuration) grads, vars = zip(*optimizer.compute_gradients( loss)) # compute gradients of variables with respect to loss grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale = [0.01 * grad for grad in grads_clip[:2] ] + grads_clip[2:] # smaller gradient scale for w, b train_op = optimizer.apply_gradients( zip(grads_rescale, vars), global_step=global_step) # gradient update operation # check variables memory variable_count = np.sum( np.array([ np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars ])) print("total variables :", variable_count) # record loss loss_summary = tf.summary.scalar("loss", loss) merged = tf.summary.merge_all() saver = tf.train.Saver() # training session # with tf.Session() as sess: gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: tf.global_variables_initializer().run() os.makedirs(os.path.join(path, "Check_Point"), exist_ok=True) # make folder to save model os.makedirs(os.path.join(path, "logs"), exist_ok=True) # make folder to save log writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph) epoch = 0 lr_factor = 1 # lr decay factor ( 1/2 per 10000 iteration) loss_acc = 0 # accumulated loss ( for running average of loss) train_times = [ ] #===========================================================================2020/05/20 16:30 total_times = 0 #===========================================================================2020/05/20 16:30 for iter in range(config.iteration): # run forward and backward propagation and update parameters # 记录迭代训练开始时间 begin_time = time.clock( ) #===========================================================================2020/05/20 16:30 _, loss_cur, summary = sess.run([train_op, loss, merged], feed_dict={ batch: random_batch(), lr: config.lr * lr_factor }) # 记录迭代训练结束时间 end_time = time.clock( ) # ===========================================================================2020/05/20 16:30 total_times += end_time - begin_time # ===========================================================================2020/05/20 16:30 train_times.append( str(begin_time) + '_' + str(end_time) + '_' + str(end_time - begin_time) ) # ===========================================================================2020/05/20 16:30 loss_acc += loss_cur # accumulated loss for each 100 iteration if iter % 10 == 0: writer.add_summary(summary, iter) # write at tensorboard if (iter + 1) % 100 == 0: print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100)) loss_acc = 0 # reset accumulated loss print( "iter:{},耗时:{}s".format(iter, str(end_time - begin_time)) ) # ===========================================================================2020/05/20 16:30 if (iter + 1) % 10000 == 0: lr_factor /= 2 # lr decay print("learning rate is decayed! current lr : ", config.lr * lr_factor) if (iter + 1) % 10000 == 0: saver.save(sess, os.path.join(path, "./Check_Point/model.ckpt"), global_step=iter // 10000) print("model is saved!") # ===========================================================================2020/05/20 16:30 # 存模型 saver.save(sess, os.path.join(path, "./Check_Point/model.ckpt"), global_step=iter) print("model is saved!") # 将时间写入文件 with open('GE2E_epoch说话人{}_batch说话人{}_人均音频数{}_iter{}_迭代耗时.txt'.format( config.spk_num, config.N, config.M, config.iteration), mode='w', encoding='utf-8') as wf: wf.write( "epoch说话人{}个;batch说话人:{}个;人均音频数:{}条;迭代总次数:{};平均每次训练迭代耗时:{}\n". format(config.spk_num, config.N, config.M, config.iteration, total_times / config.iteration)) wf.write("开始训练时间_结束训练时间_耗时\n") for line in train_times: wf.write(line + '\n')
def train(path): tf.reset_default_graph() # reset graph # draw graph batch = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32) # input batch (time x batch x n_mel) lr = tf.placeholder(dtype=tf.float32) # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) # embedding lstm (3-layer default) with tf.variable_scope("lstm"): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # define lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize print("embedded size: ", embedded.shape) # loss sim_matrix = similarity(embedded, w, b) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, type=config.loss) # optimizer operation trainable_vars = tf.trainable_variables() # get variable list optimizer = optim( lr) # get optimizer (type is determined by configuration) grads, vars = zip(*optimizer.compute_gradients( loss)) # compute gradients of variables with respect to loss grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale = [0.01 * grad for grad in grads_clip[:2] ] + grads_clip[2:] # smaller gradient scale for w, b train_op = optimizer.apply_gradients( zip(grads_rescale, vars), global_step=global_step) # gradient update operation # check variables memory variable_count = np.sum( np.array([ np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars ])) print("total variables :", variable_count) # record loss loss_summary = tf.summary.scalar("loss", loss) merged = tf.summary.merge_all() saver = tf.train.Saver() iter = 0 # training session with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if config.restore: # Restore saved model if the user requested it, default = True try: ckpt = tf.train.latest_checkpoint( checkpoint_dir=os.path.join(path, "Check_Point")) # if (checkpoint_state and checkpoint_state.model_checkpoint_path): # print('Loading checkpoint {}'.format(checkpoint_state.model_checkpoint_path)) #saver = tf.train.import_meta_graph(os.path.join(path,"Check_Point/model.cpkt.meta")) #ckpt = tf.train.load_checkpoint(os.path.join(path,"Check_Point/model")) saver.restore(sess, ckpt) # else: # print('No model to load at {}'.format(save_dir)) # saver.save(sess, checkpoint_path, global_step=global_step) except: print('Cannot restore checkpoint exception') #if loaded == 0: # raise AssertionError("ckpt file does not exist! Check config.model_num or config.model_path.") #print("train file path : ", config.test_path) else: os.makedirs(os.path.join(path, "Check_Point"), exist_ok=True) # make folder to save model os.makedirs(os.path.join(path, "logs"), exist_ok=True) # make folder to save log writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph) epoch = 0 lr_factor = 1 # lr decay factor ( 1/2 per 10000 iteration) loss_acc = 0 # accumulated loss ( for running average of loss) iter = 0 training_data_size = len(os.listdir(config.train_path)) print("train_size: ", training_data_size) prev_iter = -1 # while iter < config.iteration : while iter < config.iteration: prev_iter = iter # run forward and backward propagation and update parameters iter, _, loss_cur, summary = sess.run( [global_step, train_op, loss, merged], feed_dict={ batch: random_batch(), lr: config.lr * lr_factor }) loss_acc += loss_cur # accumulated loss for each 100 iteration if (iter - prev_iter > 1): epoch = config.N * (iter + 1) // training_data_size #lr_factor = lr_factor / (2**(epoch//100)) lr_factor = lr_factor / (2**(iter // 10000)) print("restored epoch:", epoch) print("restored learning rate:", lr_factor * config.lr) #if iter % 1000 == 0: # writer.add_summary(summary, iter) # write at tensorboard if (iter + 1) % 100 == 0: print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100)) loss_acc = 0 # reset accumulated loss #if config.N * (iter+1) % training_data_size == 0: # epoch = epoch + 1 # print("epoch: ", epoch) if (iter + 1) % 10000 == 0: lr_factor /= 2 print("learning rate is decayed! current lr : ", config.lr * lr_factor) #if ((config.N * (iter+1)) / training_data_size)%100 == 0: # lr_factor = lr_factor / 2 # print("learning factor: " , lr_factor) # print("learning rate is decayed! current lr : ", config.lr*lr_factor) if (iter + 1) % 5000 == 0: saver.save(sess, os.path.join(path, "Check_Point/model.ckpt"), global_step=iter) #pooooooooooooint writer.add_summary(summary, iter) # write at tensorboard print("model is saved!")
def train(path, args): tf.reset_default_graph() # reset graph timestamp = time_string() if args.time_string == None else args.time_string # draw graph feeder = Feeder(args.train_filename, args, hparams) output_classes = max( [int(f) for f in feeder.total_emt]) + 1 if args.model_type in [ 'emt', 'accent' ] else max([int(f) for f in feeder.total_spk]) + 1 batch = tf.placeholder( shape=[args.N * args.M, None, config.n_mels], dtype=tf.float32) # input batch (time x batch x n_mel) labels = tf.placeholder(shape=[args.N * args.M], dtype=tf.int32) lr = tf.placeholder(dtype=tf.float32) # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) # embedded = triple_lstm(batch) print("Training {} Discriminator Model".format(args.model_type)) encoder = ReferenceEncoder( filters=hparams.reference_filters, kernel_size=(3, 3), strides=(2, 2), is_training=True, scope='Tacotron_model/inference/pretrained_ref_enc_{}'.format( args.model_type), depth=hparams.reference_depth) # [N, 128]) embedded = encoder(batch) embedded = normalize(embedded) if args.discriminator: logit = tf.layers.dense( embedded, output_classes, name='Tacotron_model/inference/pretrained_ref_enc_{}_dense'.format( args.model_type)) labels_one_hot = tf.one_hot(tf.to_int32(labels), output_classes) # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit,labels=labels_one_hot)) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=labels_one_hot)) acc, acc_op = tf.metrics.accuracy(labels=tf.argmax(labels_one_hot, 1), predictions=tf.argmax(logit, 1)) val_acc, val_acc_op = tf.metrics.accuracy( labels=tf.argmax(labels_one_hot, 1), predictions=tf.argmax(logit, 1)) else: # loss sim_matrix = similarity(embedded, w, b, args.N, args.M, P=hparams.reference_depth) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, args.N, args.M, type=config.loss) val_acc_op = tf.constant(1.) # optimizer operation trainable_vars = tf.trainable_variables() # get variable list optimizer = optim( lr) # get optimizer (type is determined by configuration) grads, vars = zip(*optimizer.compute_gradients( loss)) # compute gradients of variables with respect to loss if args.discriminator: grads_rescale = grads else: grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale = [0.01 * grad for grad in grads_clip[:2] ] + grads_clip[2:] # smaller gradient scale for w, b train_op = optimizer.apply_gradients( zip(grads_rescale, vars), global_step=global_step) # gradient update operation # check variables memory variable_count = np.sum( np.array([ np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars ])) print("total variables :", variable_count) # record loss loss_summary = tf.summary.scalar("loss", loss) merged = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=20) loss_window = ValueWindow(100) acc_window = ValueWindow(100) val_loss_window = ValueWindow(5) val_acc_window = ValueWindow(5) # training session with tf.Session() as sess: tf.local_variables_initializer().run() tf.global_variables_initializer().run() checkpoint_folder = os.path.join(path, "checkpoints", timestamp) logs_folder = os.path.join(path, "logs", timestamp) os.makedirs(checkpoint_folder, exist_ok=True) # make folder to save model os.makedirs(logs_folder, exist_ok=True) # make folder to save log model_name = '{}_disc_model.ckpt'.format(args.model_type) checkpoint_path = os.path.join(checkpoint_folder, model_name) if args.restore: checkpoint_state = tf.train.get_checkpoint_state(checkpoint_folder) if (checkpoint_state and checkpoint_state.model_checkpoint_path): print('Loading checkpoint {}'.format( checkpoint_state.model_checkpoint_path)) saver.restore(sess, checkpoint_state.model_checkpoint_path) else: print('No model to load at {}'.format(checkpoint_folder)) saver.save(sess, checkpoint_path, global_step=global_step) else: print('Starting new training!') saver.save(sess, checkpoint_path, global_step=global_step) writer = tf.summary.FileWriter(logs_folder, sess.graph) lr_factor = 1 # lr decay factor ( 1/2 per 10000 iteration) iterations = 30000 if args.model_type == 'emt' else config.iteration for iter in range(iterations): if args.discriminator: batch_iter, _, labels_iter = feeder.random_batch_disc() else: batch_iter, _, labels_iter = feeder.random_batch() # run forward and backward propagation and update parameters step, _, loss_cur, summary, acc_cur = sess.run( [global_step, train_op, loss, merged, acc_op], feed_dict={ batch: batch_iter, labels: labels_iter, lr: config.lr * lr_factor }) loss_window.append(loss_cur) acc_window.append(acc_cur) if step % 10 == 0: writer.add_summary(summary, step) # write at tensorboard if (step + 1) % 20 == 0: val_loss_cur_batch = 0 val_acc_cur_batch = 0 for iter in range(VAL_ITERS): if args.discriminator: batch_iter, _, labels_iter = feeder.random_batch_disc( TEST=True) else: batch_iter, _, labels_iter = feeder.random_batch( TEST=True) # run forward and backward propagation and update parameters val_loss_cur, val_acc_cur = sess.run([loss, val_acc_op], feed_dict={ batch: batch_iter, labels: labels_iter }) val_loss_cur_batch += val_loss_cur val_acc_cur_batch += val_acc_cur val_loss_cur_batch /= VAL_ITERS val_acc_cur_batch /= VAL_ITERS val_loss_window.append(val_loss_cur_batch) val_acc_window.append(val_acc_cur_batch) message = "(iter : %d) loss: %.4f" % ( (step + 1), loss_window.average) if args.discriminator: message += ', acc: {:.2f}%'.format(acc_window.average) message += ", val_loss: %.4f" % (val_loss_window.average) if args.discriminator: message += ', val_acc: {:.2f}%'.format( val_acc_window.average) print(message) lr_changed = False if args.model_type == 'emt': if step > 6000: lr_changed = True if lr_factor != .01 else False lr_factor = .01 elif step > 4000: lr_changed = True if lr_factor != .1 else False lr_factor = .1 if lr_changed: print("learning rate is decayed! current lr : ", config.lr * lr_factor) elif args.model_type == 'spk': if step > 300: #4000: lr_changed = True if lr_factor != .01 else False lr_factor = .01 elif step > 180: #2500: lr_changed = True if lr_factor != .1 else False lr_factor = .1 if lr_changed: print("learning rate is decayed! current lr : ", config.lr * lr_factor) if step % config.save_checkpoint_iters == 0: saver.save(sess, checkpoint_path, global_step=global_step)
def train(path): tf.reset_default_graph() # reset graph # Draw train graph train_batch = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32) # input batch (time x batch x n_mel) lr = tf.placeholder(dtype=tf.float32) # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) # Embedding LSTM (3-layer default) with tf.variable_scope("lstm", reuse=None): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] print(config.num_layer) lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # define lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=train_batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize print("embedded size: ", embedded.shape) # Define loss sim_matrix = similarity(embedded, w, b) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, type=config.loss) # Optimizer operation trainable_vars = tf.trainable_variables() # get variable list optimizer = optim( lr) # get optimizer (type is determined by configuration) grads, vars = zip(*optimizer.compute_gradients( loss)) # compute gradients of variables with respect to loss grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale = [0.01 * grad for grad in grads_clip[:2] ] + grads_clip[2:] # smaller gradient scale for w, b train_op = optimizer.apply_gradients( zip(grads_rescale, vars), global_step=global_step) # gradient update operation # Check variables memory variable_count = np.sum( np.array([ np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars ])) print("total variables :", variable_count) # TensorBoard vars declaration lr_summ = tf.summary.scalar(name='My_LR', tensor=lr) loss_summary = tf.summary.scalar("loss_ORIG", loss) w_summary = tf.summary.histogram('My_Weights', w) b_summary = tf.summary.histogram('My_Bias', b) merged = tf.summary.merge_all() # merge all TB vars into one saver = tf.train.Saver( max_to_keep=40 ) # create a saver, max_to_keep=40 w/ every 2500 steps = around 100000 # Training session with tf.Session() as sess: tf.global_variables_initializer().run() os.makedirs(os.path.join(path, "Check_Point"), exist_ok=True) # make folder to save model os.makedirs(os.path.join(path, "logs"), exist_ok=True) # make folder to save TensorBoard logs os.makedirs( "./Plots/", exist_ok=True) # make folder to save all plots and .txt logs os.makedirs("./Plots/" + path[11:], exist_ok=True) # makes the subdirs for individual plots log_path = "./Plots/" + path[11:] + "/" + path[ 11:] + ".txt" # declares .txt log files naming convention # Block of code to make folders of runs for TensorBoard visualization logspath = os.path.join(path, "logs") num_previous_runs = os.listdir('./tisv_model/logs') if len(num_previous_runs) == 0: run_number = 1 else: run_number = max( [int(s.split('run_')[1]) for s in num_previous_runs]) + 1 curr_logdir = 'run_%02d' % run_number writer = tf.summary.FileWriter( os.path.join(logspath, curr_logdir), sess.graph) # Define writer for TensorBoard # END of Block # epoch = 0 # not used lr_factor = 1 # LR decay factor (1/2 per 10000 iteration) loss_acc = 0 # accumulated loss (for calculating average of loss) # declares lists for figure creation EER_list = [] # collects the EER results every 100 steps for plotting train_loss_list = [ ] # collects the training loss results every 100 steps for plotting # LR_decay_list = [] # not used for iter in range(config.iteration): # run forward and backward propagation and update parameters _, loss_cur, summary = sess.run([train_op, loss, merged], feed_dict={ train_batch: random_batch(), lr: config.lr * lr_factor }) loss_acc += loss_cur # accumulated loss for each 100 iteration # write train_loss to TensorBoard if iter % 10 == 0: writer.add_summary(summary, iter) # perform validation if (iter + 1) % 100 == 0: # print("(iter : %d) loss: %.4f" % ((iter+1),loss_acc/100)) # print("==============VALIDATION START!============") # Draw validation graph enroll = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32 ) # enrollment batch (time x batch x n_mel) valid = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32 ) # validation batch (time x batch x n_mel) val_batch = tf.concat([enroll, valid], axis=1) # Embedding LSTM (3-layer default) with tf.variable_scope("lstm", reuse=tf.AUTO_REUSE): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # make lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=val_batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[ -1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize # print("embedded size: ", embedded.shape) # enrollment embedded vectors (speaker model) enroll_embed = normalize( tf.reduce_mean(tf.reshape(embedded[:config.N * config.M, :], shape=[config.N, config.M, -1]), axis=1)) # validation embedded vectors valid_embed = embedded[config.N * config.M:, :] similarity_matrix = similarity(embedded=valid_embed, w=1., b=0., center=enroll_embed) # print("test file path : ", config.test_path) # Return similarity matrix (SM) after enrollment and validation time1 = time.time() # for check inference time S = sess.run(similarity_matrix, feed_dict={ enroll: random_batch(shuffle=False, forceValidation=True), valid: random_batch(shuffle=False, utter_start=config.M, forceValidation=True) }) S = S.reshape([config.N, config.M, -1]) time2 = time.time() np.set_printoptions(precision=4) # print("inference time for %d utterences : %0.2fs" % (2 * config.M * config.N, time2 - time1)) # print(S) # print similarity matrix # calculating EER diff = 1 EER = 0 EER_thres = 0 EER_FAR = 0 EER_FRR = 0 # through thresholds calculate false acceptance ratio (FAR) and false reject ratio (FRR) for thres in [0.01 * i + 0.5 for i in range(50)]: S_thres = S > thres # False acceptance ratio = false acceptance / mismatched population (enroll speaker != validation speaker) FAR = sum([ np.sum(S_thres[i]) - np.sum(S_thres[i, :, i]) for i in range(config.N) ]) / (config.N - 1) / config.M / config.N # False reject ratio = false reject / matched population (enroll speaker = validation speaker) FRR = sum([ config.M - np.sum(S_thres[i][:, i]) for i in range(config.N) ]) / config.M / config.N # Save threshold when FAR = FRR (=EER) if diff > abs(FAR - FRR): diff = abs(FAR - FRR) EER = (FAR + FRR) / 2 EER_thres = thres EER_FAR = FAR EER_FRR = FRR print( "\n(iter : %d) loss: %.4f || EER : %0.4f (thres:%0.4f, FAR:%0.4f, FRR:%0.4f) || inference time for %d utterences: %0.2fs" % ((iter + 1), loss_acc / 100, EER, EER_thres, EER_FAR, EER_FRR, 2 * config.M * config.N, time2 - time1)) EER_list.append(EER) # print("==============VALIDATION END!==============") train_loss_list.append(loss_acc / 100) # save figures if (iter + 1) % 500 == 0: plt.ioff() fig_EER = plt.figure() iter_list = [(i + 1) * 100 for i in range(len(EER_list))] plt.plot(iter_list, EER_list, label="EER") plt.xlabel("Steps") plt.ylabel("EER") plt.title("Equal error rate progress") plt.grid(True) plot_path = "./Plots/" + path[11:] + "/" + path[ 11:] + ".png" print("Saving plot as: %s" % plot_path) plt.savefig(plot_path) plt.close(fig_EER) plt.ioff() fig_LOSS = plt.figure() iter_list = [(i + 1) * 100 for i in range(len(EER_list))] plt.plot(iter_list, train_loss_list, color="orange", label="train_loss") plt.xlabel("Steps") plt.ylabel("Training loss") plt.title("Training progress") plt.grid(True) plot_path = "./Plots/" + path[11:] + "/" + path[ 11:] + "_LOSS.png" print("Saving plot as: %s" % plot_path) plt.savefig(plot_path) plt.close(fig_LOSS) # Every 100 iterations, save a log of training progress with open(log_path, "a") as file: file.write( str(iter + 1) + "," + str(loss_acc / 100) + "," + str(EER) + "," + str(EER_thres) + "," + str(EER_FAR) + "," + str(EER_FRR) + "\n") loss_acc = 0 # reset accumulated loss # decay learning rate if (iter + 1) % 5000 == 0: lr_factor /= 2 # lr decay print("Learning Rate (LR) decayed! Current LR: ", config.lr * lr_factor) # save model checkpoint if (iter + 1) % 5000 == 0: saver.save(sess, os.path.join(path, "./Check_Point/model.ckpt"), global_step=iter // 5000) # naming val print("Model checkpoint saved!")