def train(self): embedded = self.creat_model() lr = tf.placeholder(dtype=tf.float32, name="learning_rate") # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) sim_matrix = similarity(embedded, w, b) loss = loss_cal(sim_matrix, type=config.loss) trainable_vars = tf.trainable_variables() # get variable list optimizer = optim(lr) # get optimizer (type is determined by configuration) grads, vars = zip(*optimizer.compute_gradients(loss)) # compute gradients of variables with respect to loss grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale = [0.01 * grad for grad in grads_clip[:2]] + grads_clip[2:] # smaller gradient scale for w, b train_op = optimizer.apply_gradients(zip(grads_rescale, vars), global_step=global_step) # gradient update operation # check variables memory variable_count = np.sum(np.array([np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars])) print("total variables :", variable_count) tf.summary.scalar("loss", loss) merged = tf.summary.merge_all() saver = tf.train.Saver() with tf.Session() as sess: tf.global_variables_initializer().run() os.makedirs(os.path.join(config.model_path, "Check_Point"), exist_ok=True) # make folder to save model os.makedirs(os.path.join(config.model_path, "logs"), exist_ok=True) # make folder to save log writer = tf.summary.FileWriter(os.path.join(config.model_path, "logs"), sess.graph) lr_factor = 1 # lr decay factor ( 1/2 per 10000 iteration) loss_acc = 0 # accumulated loss ( for running average of loss) for iter in range(config.iteration): # run forward and backward propagation and update parameters _, loss_cur, summary = sess.run([train_op, loss, merged], feed_dict={self.fingerprint_input: random_batch(), lr: config.lr * lr_factor}) loss_acc += loss_cur # accumulated loss for each 100 iteration if iter % 10 == 0: writer.add_summary(summary, iter) # write at tensorboard if (iter + 1) % 100 == 0: print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100)) loss_acc = 0 # reset accumulated loss if (iter + 1) % 1000 == 0: lr_factor /= 2 # lr decay print("learning rate is decayed! current lr : ", config.lr * lr_factor) if (iter + 1) % 1000 == 0: saver.save(sess, os.path.join(config.model_path, "./Check_Point/model.ckpt"), global_step=iter // 1000) print("model is saved!")
def train(path): tf.reset_default_graph() # reset graph # draw graph batch = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32) # input batch (time x batch x n_mel) lr = tf.placeholder(dtype=tf.float32) # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) # embedding lstm (3-layer default) with tf.variable_scope("lstm"): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # define lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize print("embedded size: ", embedded.shape) # loss sim_matrix = similarity(embedded, w, b) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, type=config.loss) # optimizer operation trainable_vars = tf.trainable_variables() # get variable list optimizer = optim( lr) # get optimizer (type is determined by configuration) grads, vars = zip(*optimizer.compute_gradients( loss)) # compute gradients of variables with respect to loss grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale = [0.01 * grad for grad in grads_clip[:2] ] + grads_clip[2:] # smaller gradient scale for w, b train_op = optimizer.apply_gradients( zip(grads_rescale, vars), global_step=global_step) # gradient update operation # check variables memory variable_count = np.sum( np.array([ np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars ])) print("total variables :", variable_count) # record loss loss_summary = tf.summary.scalar("loss", loss) merged = tf.summary.merge_all() saver = tf.train.Saver() # training session with tf.Session() as sess: tf.global_variables_initializer().run() if (os.path.exists(path)): print("Restore from {}".format( os.path.join(path, "Check_Point/model.ckpt-2"))) saver.restore(sess, os.path.join(path, "Check_Point/model.ckpt-2") ) # restore variables from selected ckpt file else: os.makedirs(os.path.join(path, "Check_Point"), exist_ok=True) # make folder to save model os.makedirs(os.path.join(path, "logs"), exist_ok=True) # make folder to save log writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph) epoch = 0 lr_factor = 1 # lr decay factor ( 1/2 per 10000 iteration) loss_acc = 0 # accumulated loss ( for running average of loss) for iter in range(config.iteration): # run forward and backward propagation and update parameters _, loss_cur, summary = sess.run([train_op, loss, merged], feed_dict={ batch: random_batch(), lr: config.lr * lr_factor }) loss_acc += loss_cur # accumulated loss for each 100 iteration if iter % 10 == 0: writer.add_summary(summary, iter) # write at tensorboard if (iter + 1) % 100 == 0: print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100)) loss_acc = 0 # reset accumulated loss if (iter + 1) % 3000 == 0: lr_factor /= 2 # lr decay print("learning rate is decayed! current lr : ", config.lr * lr_factor) if (iter + 1) % 2500 == 0: saver.save(sess, os.path.join(path, "./Check_Point/model.ckpt"), global_step=iter // 2500) print("model is saved!")
import time if __name__ == '__main__': tf_config = tf.ConfigProto() tf_config.gpu_options.per_process_gpu_memory_fraction = 0.1 sess = tf.Session(config=tf_config) N, M = 4, 5 embed = tf.placeholder(dtype=tf.float32, shape=(N * 2 * M, 3)) # new loss embed_1 = embed[:N * M] embed_2 = embed[N * M:] center_1 = embedd2center(embed_1, N, M) center_2 = embedd2center(embed_2, N, M) new_loss = loss_cal(similarity(embed_1, 1.0, 0.0, N, M, center_2), name='softmax', N=N, M=M) + \ loss_cal(similarity(embed_2, 1.0, 0.0, N, M, center_1), name='softmax', N=N, M=M) # oldloss old_loss = loss_cal(similarity(embed, 1.0, 0.0, N, M * 2), N=N, M=M * 2) sess.run(tf.global_variables_initializer()) arr = np.random.rand(N * M * 2, 128) times = [] print('Calculating old loss') x = sess.run(old_loss, feed_dict={embed: arr}) print(x) times = [] print('Calculating new loss')
def train(path): tf.reset_default_graph() # reset graph # draw graph batch = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32) # input batch (time x batch x n_mel) lr = tf.placeholder(dtype=tf.float32) # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) # embedding lstm (3-layer default) with tf.variable_scope("lstm"): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # define lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize print("embedded size: ", embedded.shape) # loss sim_matrix = similarity(embedded, w, b) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, type=config.loss) # optimizer operation trainable_vars = tf.trainable_variables() # get variable list optimizer = optim( lr) # get optimizer (type is determined by configuration) grads, vars = zip(*optimizer.compute_gradients( loss)) # compute gradients of variables with respect to loss grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale = [0.01 * grad for grad in grads_clip[:2] ] + grads_clip[2:] # smaller gradient scale for w, b train_op = optimizer.apply_gradients( zip(grads_rescale, vars), global_step=global_step) # gradient update operation # check variables memory variable_count = np.sum( np.array([ np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars ])) print("total variables :", variable_count) # record loss loss_summary = tf.summary.scalar("loss", loss) merged = tf.summary.merge_all() saver = tf.train.Saver() # training session # with tf.Session() as sess: gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: tf.global_variables_initializer().run() os.makedirs(os.path.join(path, "Check_Point"), exist_ok=True) # make folder to save model os.makedirs(os.path.join(path, "logs"), exist_ok=True) # make folder to save log writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph) epoch = 0 lr_factor = 1 # lr decay factor ( 1/2 per 10000 iteration) loss_acc = 0 # accumulated loss ( for running average of loss) train_times = [ ] #===========================================================================2020/05/20 16:30 total_times = 0 #===========================================================================2020/05/20 16:30 for iter in range(config.iteration): # run forward and backward propagation and update parameters # 记录迭代训练开始时间 begin_time = time.clock( ) #===========================================================================2020/05/20 16:30 _, loss_cur, summary = sess.run([train_op, loss, merged], feed_dict={ batch: random_batch(), lr: config.lr * lr_factor }) # 记录迭代训练结束时间 end_time = time.clock( ) # ===========================================================================2020/05/20 16:30 total_times += end_time - begin_time # ===========================================================================2020/05/20 16:30 train_times.append( str(begin_time) + '_' + str(end_time) + '_' + str(end_time - begin_time) ) # ===========================================================================2020/05/20 16:30 loss_acc += loss_cur # accumulated loss for each 100 iteration if iter % 10 == 0: writer.add_summary(summary, iter) # write at tensorboard if (iter + 1) % 100 == 0: print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100)) loss_acc = 0 # reset accumulated loss print( "iter:{},耗时:{}s".format(iter, str(end_time - begin_time)) ) # ===========================================================================2020/05/20 16:30 if (iter + 1) % 10000 == 0: lr_factor /= 2 # lr decay print("learning rate is decayed! current lr : ", config.lr * lr_factor) if (iter + 1) % 10000 == 0: saver.save(sess, os.path.join(path, "./Check_Point/model.ckpt"), global_step=iter // 10000) print("model is saved!") # ===========================================================================2020/05/20 16:30 # 存模型 saver.save(sess, os.path.join(path, "./Check_Point/model.ckpt"), global_step=iter) print("model is saved!") # 将时间写入文件 with open('GE2E_epoch说话人{}_batch说话人{}_人均音频数{}_iter{}_迭代耗时.txt'.format( config.spk_num, config.N, config.M, config.iteration), mode='w', encoding='utf-8') as wf: wf.write( "epoch说话人{}个;batch说话人:{}个;人均音频数:{}条;迭代总次数:{};平均每次训练迭代耗时:{}\n". format(config.spk_num, config.N, config.M, config.iteration, total_times / config.iteration)) wf.write("开始训练时间_结束训练时间_耗时\n") for line in train_times: wf.write(line + '\n')
def train(path): tf.reset_default_graph() # reset graph # draw graph batch = tf.placeholder(shape= [None, config.N*config.M, 40], dtype=tf.float32) # input batch (time x batch x n_mel) lr = tf.placeholder(dtype= tf.float32) # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer= np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer= np.array([-5], dtype=np.float32)) # embedding lstm (3-layer default) with tf.variable_scope("lstm"): lstm_cells = [tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer)] lstm = tf.contrib.rnn.MultiRNNCell(lstm_cells) # define lstm op and variables outputs, _ = tf.nn.dynamic_rnn(cell=lstm, inputs=batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize print("embedded size: ", embedded.shape) # loss sim_matrix = similarity(embedded, w, b) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, type=config.loss) # optimizer operation trainable_vars= tf.trainable_variables() # get variable list optimizer= optim(lr) # get optimizer (type is determined by configuration) grads, vars= zip(*optimizer.compute_gradients(loss)) # compute gradients of variables with respect to loss grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale= [0.01*grad for grad in grads_clip[:2]] + grads_clip[2:] # smaller gradient scale for w, b train_op= optimizer.apply_gradients(zip(grads_rescale, vars), global_step= global_step) # gradient update operation # check variables memory variable_count = np.sum(np.array([np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars])) print("total variables :", variable_count) # record loss loss_summary = tf.summary.scalar("loss", loss) merged = tf.summary.merge_all() saver = tf.train.Saver() # training session with tf.Session() as sess: tf.global_variables_initializer().run() if(os.path.exists(path)): print("Restore from {}".format(os.path.join(path, "Check_Point/model.ckpt-2"))) saver.restore(sess, os.path.join(path, "Check_Point/model.ckpt-2")) # restore variables from selected ckpt file else: os.makedirs(os.path.join(path, "Check_Point"), exist_ok=True) # make folder to save model os.makedirs(os.path.join(path, "logs"), exist_ok=True) # make folder to save log writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph) epoch = 0 lr_factor = 1 # lr decay factor ( 1/2 per 10000 iteration) loss_acc = 0 # accumulated loss ( for running average of loss) for iter in range(config.iteration): # run forward and backward propagation and update parameters _, loss_cur, summary = sess.run([train_op, loss, merged], feed_dict={batch: random_batch(), lr: config.lr*lr_factor}) loss_acc += loss_cur # accumulated loss for each 100 iteration if iter % 10 == 0: writer.add_summary(summary, iter) # write at tensorboard if (iter+1) % 100 == 0: print("(iter : %d) loss: %.4f" % ((iter+1),loss_acc/100)) loss_acc = 0 # reset accumulated loss if (iter+1) % 10000 == 0: lr_factor /= 2 # lr decay print("learning rate is decayed! current lr : ", config.lr*lr_factor) if (iter+1) % 10000 == 0: saver.save(sess, os.path.join(path, "./Check_Point/model.ckpt"), global_step=iter//10000) print("model is saved!")
def test(path): tf.reset_default_graph() # draw graph enroll = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32) # enrollment batch (time x batch x n_mel) verif = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32) # verification batch (time x batch x n_mel) batch = tf.concat([enroll, verif], axis=1) # embedding lstm (3-layer default) with tf.variable_scope("lstm"): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # make lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize print("embedded size: ", embedded.shape) # enrollment embedded vectors (speaker model) enroll_embed = normalize( tf.reduce_mean(tf.reshape(embedded[:config.N * config.M, :], shape=[config.N, config.M, -1]), axis=1)) # verification embedded vectors verif_embed = embedded[config.N * config.M:, :] similarity_matrix = similarity(embedded=verif_embed, w=1., b=0., center=enroll_embed) loss = loss_cal(similarity_matrix, type=config.loss) saver = tf.train.Saver(var_list=tf.global_variables()) with tf.Session() as sess: tf.global_variables_initializer().run() # load model #ckpt = tf.train.get_checkpoint_state(path) #checkpoints = ckpt.all_model_checkpoint_paths i = 139999 least_loss = 99999 #print("checkpoints : ",checkpoints) while (i < 399999): saver.restore(sess, os.path.join(path, "model.ckpt-" + str(i))) S, L = sess.run( [similarity_matrix, loss], feed_dict={ enroll: random_batch(shuffle=False), verif: random_batch(shuffle=False, utter_start=config.M) }) S = S.reshape([config.N, config.M, -1]) print("test file path : ", config.test_path) np.set_printoptions(precision=2) #print(S) if L < least_loss: #diff = abs(FAR-FRR) perfect_step = i least_loss = L print(i) print(str(L / (config.N * config.M))) i = i + 2500 print("\ncheckpoint: " + str(perfect_step) + " (loss:%0.2f)" % (least_loss))
def train(path): tf.reset_default_graph() # reset graph # draw graph batch = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32) # input batch (time x batch x n_mel) lr = tf.placeholder(dtype=tf.float32) # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) # embedding lstm (3-layer default) with tf.variable_scope("lstm"): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # define lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize print("embedded size: ", embedded.shape) # loss sim_matrix = similarity(embedded, w, b) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, type=config.loss) # optimizer operation trainable_vars = tf.trainable_variables() # get variable list optimizer = optim( lr) # get optimizer (type is determined by configuration) grads, vars = zip(*optimizer.compute_gradients( loss)) # compute gradients of variables with respect to loss grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale = [0.01 * grad for grad in grads_clip[:2] ] + grads_clip[2:] # smaller gradient scale for w, b train_op = optimizer.apply_gradients( zip(grads_rescale, vars), global_step=global_step) # gradient update operation # check variables memory variable_count = np.sum( np.array([ np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars ])) print("total variables :", variable_count) # record loss loss_summary = tf.summary.scalar("loss", loss) merged = tf.summary.merge_all() saver = tf.train.Saver() iter = 0 # training session with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if config.restore: # Restore saved model if the user requested it, default = True try: ckpt = tf.train.latest_checkpoint( checkpoint_dir=os.path.join(path, "Check_Point")) # if (checkpoint_state and checkpoint_state.model_checkpoint_path): # print('Loading checkpoint {}'.format(checkpoint_state.model_checkpoint_path)) #saver = tf.train.import_meta_graph(os.path.join(path,"Check_Point/model.cpkt.meta")) #ckpt = tf.train.load_checkpoint(os.path.join(path,"Check_Point/model")) saver.restore(sess, ckpt) # else: # print('No model to load at {}'.format(save_dir)) # saver.save(sess, checkpoint_path, global_step=global_step) except: print('Cannot restore checkpoint exception') #if loaded == 0: # raise AssertionError("ckpt file does not exist! Check config.model_num or config.model_path.") #print("train file path : ", config.test_path) else: os.makedirs(os.path.join(path, "Check_Point"), exist_ok=True) # make folder to save model os.makedirs(os.path.join(path, "logs"), exist_ok=True) # make folder to save log writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph) epoch = 0 lr_factor = 1 # lr decay factor ( 1/2 per 10000 iteration) loss_acc = 0 # accumulated loss ( for running average of loss) iter = 0 training_data_size = len(os.listdir(config.train_path)) print("train_size: ", training_data_size) prev_iter = -1 # while iter < config.iteration : while iter < config.iteration: prev_iter = iter # run forward and backward propagation and update parameters iter, _, loss_cur, summary = sess.run( [global_step, train_op, loss, merged], feed_dict={ batch: random_batch(), lr: config.lr * lr_factor }) loss_acc += loss_cur # accumulated loss for each 100 iteration if (iter - prev_iter > 1): epoch = config.N * (iter + 1) // training_data_size #lr_factor = lr_factor / (2**(epoch//100)) lr_factor = lr_factor / (2**(iter // 10000)) print("restored epoch:", epoch) print("restored learning rate:", lr_factor * config.lr) #if iter % 1000 == 0: # writer.add_summary(summary, iter) # write at tensorboard if (iter + 1) % 100 == 0: print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100)) loss_acc = 0 # reset accumulated loss #if config.N * (iter+1) % training_data_size == 0: # epoch = epoch + 1 # print("epoch: ", epoch) if (iter + 1) % 10000 == 0: lr_factor /= 2 print("learning rate is decayed! current lr : ", config.lr * lr_factor) #if ((config.N * (iter+1)) / training_data_size)%100 == 0: # lr_factor = lr_factor / 2 # print("learning factor: " , lr_factor) # print("learning rate is decayed! current lr : ", config.lr*lr_factor) if (iter + 1) % 5000 == 0: saver.save(sess, os.path.join(path, "Check_Point/model.ckpt"), global_step=iter) #pooooooooooooint writer.add_summary(summary, iter) # write at tensorboard print("model is saved!")
def train(path, args): tf.reset_default_graph() # reset graph timestamp = time_string() if args.time_string == None else args.time_string # draw graph feeder = Feeder(args.train_filename, args, hparams) output_classes = max( [int(f) for f in feeder.total_emt]) + 1 if args.model_type in [ 'emt', 'accent' ] else max([int(f) for f in feeder.total_spk]) + 1 batch = tf.placeholder( shape=[args.N * args.M, None, config.n_mels], dtype=tf.float32) # input batch (time x batch x n_mel) labels = tf.placeholder(shape=[args.N * args.M], dtype=tf.int32) lr = tf.placeholder(dtype=tf.float32) # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) # embedded = triple_lstm(batch) print("Training {} Discriminator Model".format(args.model_type)) encoder = ReferenceEncoder( filters=hparams.reference_filters, kernel_size=(3, 3), strides=(2, 2), is_training=True, scope='Tacotron_model/inference/pretrained_ref_enc_{}'.format( args.model_type), depth=hparams.reference_depth) # [N, 128]) embedded = encoder(batch) embedded = normalize(embedded) if args.discriminator: logit = tf.layers.dense( embedded, output_classes, name='Tacotron_model/inference/pretrained_ref_enc_{}_dense'.format( args.model_type)) labels_one_hot = tf.one_hot(tf.to_int32(labels), output_classes) # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit,labels=labels_one_hot)) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=labels_one_hot)) acc, acc_op = tf.metrics.accuracy(labels=tf.argmax(labels_one_hot, 1), predictions=tf.argmax(logit, 1)) val_acc, val_acc_op = tf.metrics.accuracy( labels=tf.argmax(labels_one_hot, 1), predictions=tf.argmax(logit, 1)) else: # loss sim_matrix = similarity(embedded, w, b, args.N, args.M, P=hparams.reference_depth) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, args.N, args.M, type=config.loss) val_acc_op = tf.constant(1.) # optimizer operation trainable_vars = tf.trainable_variables() # get variable list optimizer = optim( lr) # get optimizer (type is determined by configuration) grads, vars = zip(*optimizer.compute_gradients( loss)) # compute gradients of variables with respect to loss if args.discriminator: grads_rescale = grads else: grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale = [0.01 * grad for grad in grads_clip[:2] ] + grads_clip[2:] # smaller gradient scale for w, b train_op = optimizer.apply_gradients( zip(grads_rescale, vars), global_step=global_step) # gradient update operation # check variables memory variable_count = np.sum( np.array([ np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars ])) print("total variables :", variable_count) # record loss loss_summary = tf.summary.scalar("loss", loss) merged = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=20) loss_window = ValueWindow(100) acc_window = ValueWindow(100) val_loss_window = ValueWindow(5) val_acc_window = ValueWindow(5) # training session with tf.Session() as sess: tf.local_variables_initializer().run() tf.global_variables_initializer().run() checkpoint_folder = os.path.join(path, "checkpoints", timestamp) logs_folder = os.path.join(path, "logs", timestamp) os.makedirs(checkpoint_folder, exist_ok=True) # make folder to save model os.makedirs(logs_folder, exist_ok=True) # make folder to save log model_name = '{}_disc_model.ckpt'.format(args.model_type) checkpoint_path = os.path.join(checkpoint_folder, model_name) if args.restore: checkpoint_state = tf.train.get_checkpoint_state(checkpoint_folder) if (checkpoint_state and checkpoint_state.model_checkpoint_path): print('Loading checkpoint {}'.format( checkpoint_state.model_checkpoint_path)) saver.restore(sess, checkpoint_state.model_checkpoint_path) else: print('No model to load at {}'.format(checkpoint_folder)) saver.save(sess, checkpoint_path, global_step=global_step) else: print('Starting new training!') saver.save(sess, checkpoint_path, global_step=global_step) writer = tf.summary.FileWriter(logs_folder, sess.graph) lr_factor = 1 # lr decay factor ( 1/2 per 10000 iteration) iterations = 30000 if args.model_type == 'emt' else config.iteration for iter in range(iterations): if args.discriminator: batch_iter, _, labels_iter = feeder.random_batch_disc() else: batch_iter, _, labels_iter = feeder.random_batch() # run forward and backward propagation and update parameters step, _, loss_cur, summary, acc_cur = sess.run( [global_step, train_op, loss, merged, acc_op], feed_dict={ batch: batch_iter, labels: labels_iter, lr: config.lr * lr_factor }) loss_window.append(loss_cur) acc_window.append(acc_cur) if step % 10 == 0: writer.add_summary(summary, step) # write at tensorboard if (step + 1) % 20 == 0: val_loss_cur_batch = 0 val_acc_cur_batch = 0 for iter in range(VAL_ITERS): if args.discriminator: batch_iter, _, labels_iter = feeder.random_batch_disc( TEST=True) else: batch_iter, _, labels_iter = feeder.random_batch( TEST=True) # run forward and backward propagation and update parameters val_loss_cur, val_acc_cur = sess.run([loss, val_acc_op], feed_dict={ batch: batch_iter, labels: labels_iter }) val_loss_cur_batch += val_loss_cur val_acc_cur_batch += val_acc_cur val_loss_cur_batch /= VAL_ITERS val_acc_cur_batch /= VAL_ITERS val_loss_window.append(val_loss_cur_batch) val_acc_window.append(val_acc_cur_batch) message = "(iter : %d) loss: %.4f" % ( (step + 1), loss_window.average) if args.discriminator: message += ', acc: {:.2f}%'.format(acc_window.average) message += ", val_loss: %.4f" % (val_loss_window.average) if args.discriminator: message += ', val_acc: {:.2f}%'.format( val_acc_window.average) print(message) lr_changed = False if args.model_type == 'emt': if step > 6000: lr_changed = True if lr_factor != .01 else False lr_factor = .01 elif step > 4000: lr_changed = True if lr_factor != .1 else False lr_factor = .1 if lr_changed: print("learning rate is decayed! current lr : ", config.lr * lr_factor) elif args.model_type == 'spk': if step > 300: #4000: lr_changed = True if lr_factor != .01 else False lr_factor = .01 elif step > 180: #2500: lr_changed = True if lr_factor != .1 else False lr_factor = .1 if lr_changed: print("learning rate is decayed! current lr : ", config.lr * lr_factor) if step % config.save_checkpoint_iters == 0: saver.save(sess, checkpoint_path, global_step=global_step)
def get_embeddings(path, args): tf.reset_default_graph() # reset graph if args.time_string == None: raise ValueError('must provide valid time_string') emb_dir = os.path.join(path, 'embeddings') os.makedirs(emb_dir, exist_ok=True) meta_path = os.path.join(emb_dir, 'meta.tsv') emb_path = os.path.join( emb_dir, 'emb_emt.tsv') if args.model_type == 'emt' else os.path.join( emb_dir, 'emb_spk.tsv') # draw graph feeder = Feeder(args.train_filename, args, hparams) datasets = ['emt4', 'vctk'] if args.model_type == 'emt' else ['vctk'] num_datasets = len(datasets) batch = tf.placeholder( shape=[num_datasets * args.N * args.M, None, config.n_mels], dtype=tf.float32) # input batch (time x batch x n_mel) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) # embedded = triple_lstm(batch) print("{} Discriminator Model".format(args.model_type)) encoder = ReferenceEncoder( filters=hparams.reference_filters, kernel_size=(3, 3), strides=(2, 2), is_training=True, scope='Tacotron_model/inference/pretrained_ref_enc_{}'.format( args.model_type), depth=hparams.reference_depth) # [N, 128]) embedded = encoder(batch) # loss sim_matrix = similarity(embedded, w, b, num_datasets * args.N, args.M, P=hparams.reference_depth) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, num_datasets * args.N, args.M, type=config.loss) saver = tf.train.Saver() # training session with tf.Session() as sess: tf.global_variables_initializer().run() checkpoint_folder = os.path.join(path, "checkpoints", args.time_string) checkpoint_state = tf.train.get_checkpoint_state(checkpoint_folder) if (checkpoint_state and checkpoint_state.model_checkpoint_path): print('Loading checkpoint {}'.format( checkpoint_state.model_checkpoint_path)) saver.restore(sess, checkpoint_state.model_checkpoint_path) else: raise ValueError( 'No model to load at {}'.format(checkpoint_folder)) feeder_batch, meta = feeder.emb_batch(make_meta=True, datasets=datasets) emb, loss = sess.run([embedded, loss], feed_dict={batch: feeder_batch}) print("loss: {:.4f}".format(loss)) meta.to_csv(meta_path, sep='\t', index=False) pd.DataFrame(emb).to_csv(emb_path, sep='\t', index=False, header=False)
def train(path): tf.reset_default_graph() # reset graph # Draw train graph train_batch = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32) # input batch (time x batch x n_mel) lr = tf.placeholder(dtype=tf.float32) # learning rate global_step = tf.Variable(0, name='global_step', trainable=False) w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32)) b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32)) # Embedding LSTM (3-layer default) with tf.variable_scope("lstm", reuse=None): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] print(config.num_layer) lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # define lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=train_batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize print("embedded size: ", embedded.shape) # Define loss sim_matrix = similarity(embedded, w, b) print("similarity matrix size: ", sim_matrix.shape) loss = loss_cal(sim_matrix, type=config.loss) # Optimizer operation trainable_vars = tf.trainable_variables() # get variable list optimizer = optim( lr) # get optimizer (type is determined by configuration) grads, vars = zip(*optimizer.compute_gradients( loss)) # compute gradients of variables with respect to loss grads_clip, _ = tf.clip_by_global_norm(grads, 3.0) # l2 norm clipping by 3 grads_rescale = [0.01 * grad for grad in grads_clip[:2] ] + grads_clip[2:] # smaller gradient scale for w, b train_op = optimizer.apply_gradients( zip(grads_rescale, vars), global_step=global_step) # gradient update operation # Check variables memory variable_count = np.sum( np.array([ np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars ])) print("total variables :", variable_count) # TensorBoard vars declaration lr_summ = tf.summary.scalar(name='My_LR', tensor=lr) loss_summary = tf.summary.scalar("loss_ORIG", loss) w_summary = tf.summary.histogram('My_Weights', w) b_summary = tf.summary.histogram('My_Bias', b) merged = tf.summary.merge_all() # merge all TB vars into one saver = tf.train.Saver( max_to_keep=40 ) # create a saver, max_to_keep=40 w/ every 2500 steps = around 100000 # Training session with tf.Session() as sess: tf.global_variables_initializer().run() os.makedirs(os.path.join(path, "Check_Point"), exist_ok=True) # make folder to save model os.makedirs(os.path.join(path, "logs"), exist_ok=True) # make folder to save TensorBoard logs os.makedirs( "./Plots/", exist_ok=True) # make folder to save all plots and .txt logs os.makedirs("./Plots/" + path[11:], exist_ok=True) # makes the subdirs for individual plots log_path = "./Plots/" + path[11:] + "/" + path[ 11:] + ".txt" # declares .txt log files naming convention # Block of code to make folders of runs for TensorBoard visualization logspath = os.path.join(path, "logs") num_previous_runs = os.listdir('./tisv_model/logs') if len(num_previous_runs) == 0: run_number = 1 else: run_number = max( [int(s.split('run_')[1]) for s in num_previous_runs]) + 1 curr_logdir = 'run_%02d' % run_number writer = tf.summary.FileWriter( os.path.join(logspath, curr_logdir), sess.graph) # Define writer for TensorBoard # END of Block # epoch = 0 # not used lr_factor = 1 # LR decay factor (1/2 per 10000 iteration) loss_acc = 0 # accumulated loss (for calculating average of loss) # declares lists for figure creation EER_list = [] # collects the EER results every 100 steps for plotting train_loss_list = [ ] # collects the training loss results every 100 steps for plotting # LR_decay_list = [] # not used for iter in range(config.iteration): # run forward and backward propagation and update parameters _, loss_cur, summary = sess.run([train_op, loss, merged], feed_dict={ train_batch: random_batch(), lr: config.lr * lr_factor }) loss_acc += loss_cur # accumulated loss for each 100 iteration # write train_loss to TensorBoard if iter % 10 == 0: writer.add_summary(summary, iter) # perform validation if (iter + 1) % 100 == 0: # print("(iter : %d) loss: %.4f" % ((iter+1),loss_acc/100)) # print("==============VALIDATION START!============") # Draw validation graph enroll = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32 ) # enrollment batch (time x batch x n_mel) valid = tf.placeholder( shape=[None, config.N * config.M, 40], dtype=tf.float32 ) # validation batch (time x batch x n_mel) val_batch = tf.concat([enroll, valid], axis=1) # Embedding LSTM (3-layer default) with tf.variable_scope("lstm", reuse=tf.AUTO_REUSE): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # make lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=val_batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[ -1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize # print("embedded size: ", embedded.shape) # enrollment embedded vectors (speaker model) enroll_embed = normalize( tf.reduce_mean(tf.reshape(embedded[:config.N * config.M, :], shape=[config.N, config.M, -1]), axis=1)) # validation embedded vectors valid_embed = embedded[config.N * config.M:, :] similarity_matrix = similarity(embedded=valid_embed, w=1., b=0., center=enroll_embed) # print("test file path : ", config.test_path) # Return similarity matrix (SM) after enrollment and validation time1 = time.time() # for check inference time S = sess.run(similarity_matrix, feed_dict={ enroll: random_batch(shuffle=False, forceValidation=True), valid: random_batch(shuffle=False, utter_start=config.M, forceValidation=True) }) S = S.reshape([config.N, config.M, -1]) time2 = time.time() np.set_printoptions(precision=4) # print("inference time for %d utterences : %0.2fs" % (2 * config.M * config.N, time2 - time1)) # print(S) # print similarity matrix # calculating EER diff = 1 EER = 0 EER_thres = 0 EER_FAR = 0 EER_FRR = 0 # through thresholds calculate false acceptance ratio (FAR) and false reject ratio (FRR) for thres in [0.01 * i + 0.5 for i in range(50)]: S_thres = S > thres # False acceptance ratio = false acceptance / mismatched population (enroll speaker != validation speaker) FAR = sum([ np.sum(S_thres[i]) - np.sum(S_thres[i, :, i]) for i in range(config.N) ]) / (config.N - 1) / config.M / config.N # False reject ratio = false reject / matched population (enroll speaker = validation speaker) FRR = sum([ config.M - np.sum(S_thres[i][:, i]) for i in range(config.N) ]) / config.M / config.N # Save threshold when FAR = FRR (=EER) if diff > abs(FAR - FRR): diff = abs(FAR - FRR) EER = (FAR + FRR) / 2 EER_thres = thres EER_FAR = FAR EER_FRR = FRR print( "\n(iter : %d) loss: %.4f || EER : %0.4f (thres:%0.4f, FAR:%0.4f, FRR:%0.4f) || inference time for %d utterences: %0.2fs" % ((iter + 1), loss_acc / 100, EER, EER_thres, EER_FAR, EER_FRR, 2 * config.M * config.N, time2 - time1)) EER_list.append(EER) # print("==============VALIDATION END!==============") train_loss_list.append(loss_acc / 100) # save figures if (iter + 1) % 500 == 0: plt.ioff() fig_EER = plt.figure() iter_list = [(i + 1) * 100 for i in range(len(EER_list))] plt.plot(iter_list, EER_list, label="EER") plt.xlabel("Steps") plt.ylabel("EER") plt.title("Equal error rate progress") plt.grid(True) plot_path = "./Plots/" + path[11:] + "/" + path[ 11:] + ".png" print("Saving plot as: %s" % plot_path) plt.savefig(plot_path) plt.close(fig_EER) plt.ioff() fig_LOSS = plt.figure() iter_list = [(i + 1) * 100 for i in range(len(EER_list))] plt.plot(iter_list, train_loss_list, color="orange", label="train_loss") plt.xlabel("Steps") plt.ylabel("Training loss") plt.title("Training progress") plt.grid(True) plot_path = "./Plots/" + path[11:] + "/" + path[ 11:] + "_LOSS.png" print("Saving plot as: %s" % plot_path) plt.savefig(plot_path) plt.close(fig_LOSS) # Every 100 iterations, save a log of training progress with open(log_path, "a") as file: file.write( str(iter + 1) + "," + str(loss_acc / 100) + "," + str(EER) + "," + str(EER_thres) + "," + str(EER_FAR) + "," + str(EER_FRR) + "\n") loss_acc = 0 # reset accumulated loss # decay learning rate if (iter + 1) % 5000 == 0: lr_factor /= 2 # lr decay print("Learning Rate (LR) decayed! Current LR: ", config.lr * lr_factor) # save model checkpoint if (iter + 1) % 5000 == 0: saver.save(sess, os.path.join(path, "./Check_Point/model.ckpt"), global_step=iter // 5000) # naming val print("Model checkpoint saved!")