def __init__(self, is_training=True): # Build the computational graph when initializing self.is_training = is_training self.graph = tf.Graph() with self.graph.as_default(): self.dropout = tf.placeholder_with_default(0.0, (), name="dropout") self.global_step = tf.Variable(0, name='global_step', trainable=False) self.data, self.num_batch = get_batch(is_training=is_training) (self.passage_w, self.question_w, self.passage_c, self.question_c, self.passage_w_len_, self.question_w_len_, self.indices) = self.data self.passage_len = tf.squeeze(self.passage_w_len_) self.question_len = tf.squeeze(self.question_w_len_) self.encode_ids() self.embedding_encoder() self.context_to_query() self.model_encoder() self.output_layer() self.loss_function() if is_training: self.summary() self.init_op = tf.global_variables_initializer() total_params()
def train(logdir1='logdir/default/train1', logdir2='logdir/default/train2', queue=True): model = Model(mode="train2", batch_size=hp.Train2.batch_size, queue=queue) # Loss loss_op = model.loss_net2() # Training Scheme global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=hp.Train2.lr) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'net/net2') train_op = optimizer.minimize(loss_op, global_step=global_step, var_list=var_list) # Summary summ_op = summaries(loss_op) session_conf = tf.ConfigProto( gpu_options=tf.GPUOptions( allow_growth=True, per_process_gpu_memory_fraction=0.6, ), ) # Training with tf.Session(config=session_conf) as sess: # Load trained model sess.run(tf.global_variables_initializer()) model.load(sess, mode='train2', logdir=logdir1, logdir2=logdir2) writer = tf.summary.FileWriter(logdir2, sess.graph) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for epoch in range(1, hp.Train2.num_epochs + 1): for step in tqdm(range(model.num_batch), total=model.num_batch, ncols=70, leave=False, unit='b'): if queue: sess.run(train_op) else: mfcc, spec, mel = get_batch(model.mode, model.batch_size) sess.run(train_op, feed_dict={model.x_mfcc: mfcc, model.y_spec: spec, model.y_mel: mel}) # Write checkpoint files at every epoch summ, gs = sess.run([summ_op, global_step]) if epoch % hp.Train2.save_per_epoch == 0: tf.train.Saver().save(sess, '{}/epoch_{}_step_{}'.format(logdir2, epoch, gs)) # Eval at every n epochs with tf.Graph().as_default(): eval2.eval(logdir2, queue=False) # Convert at every n epochs with tf.Graph().as_default(): convert.convert(logdir2, queue=False) writer.add_summary(summ, global_step=gs) writer.close() coord.request_stop() coord.join(threads)
def train(): model.train() # Turn on the train mode total_loss = 0. start_time = time.time() src_mask = model.generate_square_subsequent_mask(bptt).to(device) for batch, i in tqdm(enumerate(range(0, train_data.size(0) - 1, bptt))): data, targets = get_batch(train_data, i) optimizer.zero_grad() if data.size(0) != bptt: src_mask = model.generate_square_subsequent_mask( data.size(0)).to(device) output = model(data, src_mask) loss = criterion(output.view(-1, ntokens), targets) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() total_loss += loss.item() log_interval = 200 if batch % log_interval == 0 and batch > 0: cur_loss = total_loss / log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | ' 'lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // bptt, scheduler.get_lr()[0], elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
def __init__(self,is_training = True): # Build the computational graph when initializing self.is_training = is_training self.graph = tf.Graph() with self.graph.as_default(): self.global_step = tf.Variable(0, name='global_step', trainable=False) self.data, self.num_batch = get_batch(is_training = is_training) (self.passage_w, self.question_w, self.passage_c, self.question_c, self.passage_w_len_, self.question_w_len_, self.passage_c_len, self.question_c_len, self.indices) = self.data self.passage_w_len = tf.squeeze(self.passage_w_len_) self.question_w_len = tf.squeeze(self.question_w_len_) self.encode_ids() self.params = get_attn_params(Params.attn_size, initializer = tf.contrib.layers.xavier_initializer) self.attention_match_rnn() self.bidirectional_readout() self.pointer_network() if is_training: self.loss_function() self.summary() self.init_op = tf.global_variables_initializer() else: self.outputs() total_params()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=( None, hp.max_len, )) self.y = tf.placeholder(tf.int32, shape=( None, hp.max_len, )) # Character Embedding for x self.enc = embed(self.x, len(roma2idx), hp.embed_size, scope="emb_x") # Encoder self.memory = encode(self.enc, is_training=True) # Character Embedding for decoder_inputs self.decoder_inputs = shift_by_one(self.y) self.dec = embed(self.decoder_inputs, len(surf2idx), hp.embed_size, scope="emb_decoder_inputs") # Decoder self.outputs = decode( self.dec, self.memory, len(surf2idx), is_training=is_training) # (N, T', hp.n_mels*hp.r) self.logprobs = tf.log(tf.nn.softmax(self.outputs) + 1e-10) self.preds = tf.arg_max(self.outputs, dimension=-1) if is_training: self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.y, logits=self.outputs) self.istarget = tf.to_float( tf.not_equal(self.y, tf.zeros_like(self.y))) # masking self.mean_loss = tf.reduce_sum(self.loss * self.istarget) / ( tf.reduce_sum(self.istarget) + 1e-5) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.z, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope("net"): # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1( self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2( self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) if is_training: # Loss if hp.loss_type == "l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y) self.loss2 = tf.abs(self.outputs2 - self.z) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y) self.loss2 = tf.squared_difference(self.outputs2, self.z) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss1', self.mean_loss1) tf.summary.scalar('mean_loss2', self.mean_loss2) tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
def evaluate(data, sess, model, epoch=None): # seqs_list = [] labels_pred = [] label_references = [] # (sents, tags) = data label2tag = [] label2tag = {label: tag for tag, label in hp.tag2label.items()} # for tag, label in hp.tag2label.items(): # label2tag[label] = tag for seqs, labels, seqs_len in get_batch(data, hp.batch_size, hp.vocab_path, hp.tag2label, shuffle=False): _logits, _transition_params = sess.run( [logits, transition_params], feed_dict={ model.sent_input: seqs, model.label: labels, model.sequence_length: seqs_len }) # seqs_list.extend(seqs) label_references.extend(labels) for logit, seq_len in zip(_logits, seqs_len): viterbi_seq, _ = tf.contrib.crf.viterbi_decode( logit[:seq_len], _transition_params) labels_pred.append(viterbi_seq) # print(seqs_list) # print(label_references) model_pred = [] epoch_num = str(epoch) if epoch != None else 'test' if not os.path.exists(hp.result_path): os.mkdir(hp.result_path) with open(hp.result_path + 'results_epoch_' + (epoch_num), 'w', encoding='utf-8') as fw: for label_pred, (sent, tag) in zip(labels_pred, data): fw.write(''.join(sent) + '\n') fw.write(''.join(tag) + '\n') tag_pred = [label2tag[i] for i in label_pred] fw.write(''.join(tag_pred) + '\n') sent_res = [] if len(label_pred) != len(sent): print(sent) print(len(label_pred)) print(len(sent)) for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_pred[i]]) model_pred.append(sent_res) # label_path = os.path.join(hp.result_path, 'label_' + epoch_num) # metric_path = os.path.join(hp.result_path, 'result_metric_' + epoch_num) result = conlleval(model_pred) print(result)
def evaluate(eval_model, data_source): eval_model.eval() # Turn on the evaluation mode total_loss = 0. src_mask = model.generate_square_subsequent_mask(bptt).to(device) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, bptt): data, targets = get_batch(data_source, i) if data.size(0) != bptt: src_mask = model.generate_square_subsequent_mask( data.size(0)).to(device) output = eval_model(data, src_mask) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).item() return total_loss / (len(data_source) - 1)
def eval(logdir='logdir/default/train1', queue=False): # Load graph model = Model(mode="test1", batch_size=hp.Test1.batch_size, queue=queue) # Accuracy acc_op = model.acc_net1() # Loss loss_op = model.loss_net1() # Summary summ_op = summaries(acc_op, loss_op) session_conf = tf.ConfigProto( allow_soft_placement=True, device_count={ 'CPU': 1, 'GPU': 0 }, ) with tf.Session(config=session_conf) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) writer = tf.summary.FileWriter(logdir, sess.graph) # Load trained model sess.run(tf.global_variables_initializer()) model.load(sess, 'train1', logdir=logdir) if queue: summ, acc, loss = sess.run([summ_op, acc_op, loss_op]) else: mfcc, ppg = get_batch(model.mode, model.batch_size) summ, acc, loss = sess.run([summ_op, acc_op, loss_op], feed_dict={ model.x_mfcc: mfcc, model.y_ppgs: ppg }) writer.add_summary(summ) print("acc:", acc) print("loss:", loss) print('\n') writer.close() coord.request_stop() coord.join(threads)
def eval(logdir, hparams): # Load graph model = Model(mode="test1", hparams=hparams) # Accuracy acc_op = model.acc_net1() # Loss loss_op = model.loss_net1() # Summary summ_op = summaries(acc_op, loss_op) #session_conf = tf.ConfigProto( # allow_soft_placement=True, # device_count={'CPU': 1, 'GPU': 0}, #) session_conf = tf.ConfigProto() session_conf.gpu_options.per_process_gpu_memory_fraction = 0.9 with tf.Session(config=session_conf) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) writer = tf.summary.FileWriter(logdir, sess.graph) # Load trained model sess.run(tf.global_variables_initializer()) model.load(sess, 'train1', logdir=logdir) mfcc, ppg = get_batch(model.mode, model.batch_size) summ, acc, loss = sess.run([summ_op, acc_op, loss_op], feed_dict={ model.x_mfcc: mfcc, model.y_ppgs: ppg }) writer.add_summary(summ) print("acc:", acc) print("loss:", loss) print('\n') writer.close() coord.request_stop() coord.join(threads)
def __init__(self, is_training=True): self.graph = tf.Graph() self.is_training = is_training with self.graph.as_default(): if is_training: self.x, self.y, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.y = tf.placeholder(tf.int32, shape=(None, hp.max_len)) self.decoder_inputs = embed(shift_by_one(self.y), len(char2idx), hp.embed_size) # (N, T', E) with tf.variable_scope('net'): # Encoder self.memory = encode( self.x, is_training=is_training) # (N, T, hp.n_mels*hp.r) # Decoder self.outputs = decode(self.decoder_inputs, self.memory, is_training=is_training) # (N, T', E) self.logprobs = tf.log(tf.nn.softmax(self.outputs) + 1e-10) self.preds = tf.arg_max(self.outputs, dimension=-1) if is_training: # Loss self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.y, logits=self.outputs) # Target masking self.istarget = tf.to_float(tf.not_equal(self.y, 0)) self.mean_loss = tf.reduce_sum(self.loss * self.istarget) / ( tf.reduce_sum(self.istarget) + 1e-7) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summary tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
def test(hp): # Loading hyper params load_hparams(hp, hp.ckpt) logging.info("# Prepare test batches") test_batches, num_test_batches, num_test_samples = get_batch( hp.test1, hp.test1, 100000, 100000, hp.vocab, hp.test_batch_size, shuffle=False) iter = tf.data.Iterator.from_structure(test_batches.output_types, test_batches.output_shapes) xs, ys = iter.get_next() test_init_op = iter.make_initializer(test_batches) logging.info("# Load model") model = Transformer(hp) logging.info("# Session") with tf.Session() as sess: ckpt_ = tf.train.latest_checkpoint(hp.ckpt) ckpt = ckpt_ if ckpt_ else hp.ckpt saver = tf.train.Saver() saver.restore(sess, ckpt) y_hat, mean_loss = model.eval(sess, test_init_op, xs, ys, num_test_batches) logging.info("# get hypotheses") hypotheses = get_hypotheses(num_test_samples, y_hat, model.idx2token) logging.info("# write results") model_output = os.path.split(ckpt)[-1] if not os.path.exists(hp.testdir): os.makedirs(hp.testdir) translation = os.path.join(hp.testdir, model_output) with open(translation, 'w', encoding="utf-8") as fout: fout.write("\n".join(hypotheses)) logging.info("# calc bleu score and append it to translation") calc_bleu_nltk(hp.test2, translation)
def export_pb_template(class_model): logger = logging.getLogger() logger.setLevel(logging.INFO) os.environ['CUDA_VISIBLE_DEVICES'] = "-1" logging.info("# hparams") hparams = Hparams() parser = hparams.parser hp = parser.parse_args() load_hparams(hp, hp.logdir) context = Context(hp) params = {"maxlens": 0x3f3f} eval_batches, num_eval_batches, num_eval_samples = get_batch( fpath=hp.eval_data, task_type=hp.task_type, input_indices=context.input_indices, vocabs=context.vocabs, context=params, batch_size=hp.batch_size, shuffle=True) # create a iterator of the correct shape and type iterr = tf.data.Iterator.from_structure(eval_batches.output_types, eval_batches.output_shapes) inputs_and_target = iterr.get_next() model = class_model(context) _ = model.eval(inputs_and_target[:-1], inputs_and_target[-1]) inference_name = model.get_inference_op_name() logging.info("inference_node_name:%s" % inference_name) saver = tf.train.Saver() with tf.Session() as sess: ckpt = tf.train.latest_checkpoint(hp.logdir) saver.restore(sess, ckpt) inference_node_name = inference_name[:inference_name.find(":")] graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names=[inference_node_name]) tf.train.write_graph(graph_def, './model', '%s.pb' % hp.pb_name, as_text=False) save_operation_specs(os.path.join("./model", '%s.ops' % hp.pb_name))
def eval(model, f, ids2tokens, idx2phr): model.eval() Y, Y_hat = [], [] with torch.no_grad(): x, y = get_batch(hp.max_span, hp.batch_size, hp.n_classes, False) x = x.cuda() _, y_hat, _ = model(x) # y_hat: (N, n_candidates) x = x.cpu().numpy().tolist() y = y.cpu().numpy().tolist() y_hat = y_hat.cpu().numpy().tolist() Y.extend(y) Y_hat.extend(y_hat) # monitoring pointer = random.randint(0, len(x) - 1) xx, yy, yy_hat = x[pointer], y[pointer], y_hat[pointer] # one sample tokens = ids2tokens(xx) # this is a function. ctx = " ".join(tokens).replace( " ##", "").split("[PAD]")[0] # bert detokenization gt = idx2phr[yy] # this is a dict. ht = " | ".join(idx2phr[each] for each in yy_hat) print(f"context: {ctx}") print(f"ground truth: {gt}") print(f"predictions: {ht}") # calc acc. n_samples = len(Y) n_correct = 0 for y, y_hat in zip(Y, Y_hat): if y in y_hat: n_correct += 1 acc = n_correct / n_samples print(f"acc@{hp.n_candidates}: %.2f" % acc) acc = str(round(acc, 2)) torch.save(model.state_dict(), f"{f}_ACC{acc}.pt")
def eval(logdir='logdir/default/train1', queue=False): # Load graph model = Model(mode="test1", batch_size=hp.Test1.batch_size, queue=queue) # Accuracy acc_op = model.acc_net1() # Loss loss_op = model.loss_net1() # Summary summ_op = summaries(acc_op, loss_op) session_conf = tf.ConfigProto( allow_soft_placement=True, device_count={'CPU': 1, 'GPU': 0}, ) with tf.Session(config=session_conf) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) writer = tf.summary.FileWriter(logdir, sess.graph) # Load trained model sess.run(tf.global_variables_initializer()) model.load(sess, 'train1', logdir=logdir) if queue: summ, acc, loss = sess.run([summ_op, acc_op, loss_op]) else: mfcc, ppg = get_batch(model.mode, model.batch_size) summ, acc, loss = sess.run([summ_op, acc_op, loss_op], feed_dict={model.x_mfcc: mfcc, model.y_ppgs: ppg}) writer.add_summary(summ) print("acc:", acc) print("loss:", loss) print('\n') writer.close() coord.request_stop() coord.join(threads)
def train_and_eval(model, optimizer, criterion, ids2tokens, idx2phr): model.train() for step in tqdm(range(hp.n_train_steps + 1)): x, y = get_batch(hp.max_span, hp.batch_size, hp.n_classes, True) x = x.cuda() y = y.cuda() optimizer.zero_grad() logits, y_hat, _ = model(x) # logits: (N, classes), y_hat: (N,) loss = criterion(logits, y) loss.backward() optimizer.step() # evaluation if step and step % 500 == 0: # monitoring eval(model, f'{hp.logdir}/{step}', ids2tokens, idx2phr) print(f"step: {step}, loss: {loss.item()}") model.train()
def __init__(self,is_training = True, vocab_size = 100000, demo = False): # Build the computational graph when initializing self.is_training = is_training self.vocab_size = vocab_size self.graph = tf.Graph() with self.graph.as_default(): self.dropout = tf.placeholder_with_default(0.0, (), name="dropout") self.global_step = tf.Variable(0, name='global_step', trainable=False) if demo: self.demo_inputs() else: self.data, self.num_batch = get_batch(is_training = is_training) (self.passage_w, self.question_w, self.passage_c, self.question_c, self.indices) = self.data self.passage_mask = tf.cast(1 - tf.cast(tf.equal(self.passage_w,1), tf.float32), tf.bool) self.question_mask = tf.cast(1 - tf.cast(tf.equal(self.question_w,1), tf.float32), tf.bool) self.passage_len = tf.reduce_sum(tf.cast(self.passage_mask, tf.int32), axis=1) self.question_len = tf.reduce_sum(tf.cast(self.question_mask, tf.int32), axis=1) self.encode_ids() self.embedding_encoder() self.context_to_query() self.model_encoder() self.output_layer() if Params.decay: self.apply_ema() if is_training: self.loss_function() self.summary() self.init_op = tf.global_variables_initializer() total_params()
def test_file(self): test_file = self.hp.test_file test_batches, num_test_batches, num_test_samples = get_batch( test_file, self.hp.maxlen, self.hp.vocab, self.hp.batch_size) iter = tf.data.Iterator.from_structure(test_batches.output_types, test_batches.output_shapes) data_element = iter.get_next() test_init_op = iter.make_initializer(test_batches) self.sess.run(test_init_op) x, y, x_len, y_len, labels = self.sess.run(data_element) feed_dict = self.m.create_feed_dict(x, y, x_len, y_len, labels) total_steps = 1 * num_test_batches total_acc = 0.0 total_loss = 0.0 for i in tqdm(range(total_steps + 1)): # dev_acc, dev_loss = sess.run([dev_accuracy_op, dev_loss_op]) test_acc, test_loss = self.sess.run([self.acc_op, self.loss_op], feed_dict=feed_dict) total_acc += test_acc total_loss += test_loss return total_acc / total_steps
def train(config): model = ConvSeq2Seq(config) trainer = Trainer(config, model) graph_handler = GraphHandler(config) sess = tf.Session() graph_handler.initialize(sess) for i, batch in tqdm(enumerate(get_batch(num_epoch=config.num_epoch))): global_step = sess.run(model.global_step) + 1 loss, acc, summary = trainer.run_step(sess, batch) print "global_step: %d, loss: %f, acc: %f" % (global_step, loss, acc) get_summary = global_step % config.log_period == 0 if get_summary: graph_handler.add_summary(summary, global_step) if global_step % config.save_period == 0: graph_handler.save_model(sess, global_step) if global_step % config.eval_period == 0: pass if global_step % config.save_period != 0: graph_handler.save_model(sess)
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): # Build vocab if is_training: _, idx2char = learn_vocab() store_vocab(idx2char) if is_training: self.x, self.y, self.z, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope("net"): # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1( self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2( self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) if is_training: # Loss if hp.loss_type == "l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y) self.loss2 = tf.abs(self.outputs2 - self.z) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y) self.loss2 = tf.squared_difference(self.outputs2, self.z) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 # Logging ## histograms self.expected1_h = tf.reduce_mean(tf.reduce_mean(self.y, -1), 0) self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1), 0) self.expected2_h = tf.reduce_mean(tf.reduce_mean(self.z, -1), 0) self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1), 0) ## images self.expected1_i = tf.expand_dims( tf.reduce_mean(self.y[:1], -1, keep_dims=True), 1) self.got1_i = tf.expand_dims( tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1) self.expected2_i = tf.expand_dims( tf.reduce_mean(self.z[:1], -1, keep_dims=True), 1) self.got2_i = tf.expand_dims( tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss1', self.mean_loss1) tf.summary.scalar('mean_loss2', self.mean_loss2) tf.summary.scalar('mean_loss', self.mean_loss) tf.summary.histogram('expected_values1', self.expected1_h) tf.summary.histogram('gotten_values1', self.got1_h) tf.summary.histogram('expected_values2', self.expected2_h) tf.summary.histogram('gotten values2', self.got2_h) tf.summary.image("expected_values1", self.expected1_i * 255) tf.summary.image("gotten_values1", self.got1_i * 255) tf.summary.image("expected_values2", self.expected2_i * 255) tf.summary.image("gotten_values2", self.got2_i * 255) self.merged = tf.summary.merge_all()
def __init__(self, num=1, mode="train"): ''' Args: mode: Either "train" or "synthesize". ''' # Load vocabulary self.char2idx, self.idx2char = load_vocab() # Set flag training = True if mode == "train" else False # Graph # Data Feeding ## L: Text. (B, N), int32 ## world: World Vocoder concatenate tensor.(B, 8*T/r, num_lf0+num_mgc+num_bap) float32 if mode == "train": self.L, self.worlds, self.worlds_WSR, self.fnames, self.num_batch = get_batch( ) self.prev_max_attentions = tf.ones(shape=(hp.B, ), dtype=tf.int32) self.gts = tf.convert_to_tensor(guided_attention()) else: # Synthesize self.L = tf.placeholder(tf.int32, shape=(None, None)) self.worlds = tf.placeholder( tf.float32, shape=(None, None, hp.num_bap + hp.num_lf0 + hp.num_mgc + hp.num_vuv)) self.prev_max_attentions = tf.placeholder(tf.int32, shape=(None, )) self.gts = tf.convert_to_tensor(guided_attention()) if num == 1 or (not training): with tf.variable_scope("Text2World"): # Get S or decoder inputs. (B, 8*T/r, num_lf0+num_mgc+num_bap) self.S = tf.concat((tf.zeros_like( self.worlds[:, :1, :]), self.worlds[:, :-1, :]), 1) # Networks with tf.variable_scope("TextEnc"): self.K, self.V = TextEnc(self.L, training=training) # (N, Tx, e) with tf.variable_scope("AudioEnc"): self.Q = AudioEnc(self.S, training=training) with tf.variable_scope("Attention"): # R: (B, T/r, 2d) # alignments: (B, N, T/r) # max_attentions: (B,) self.R, self.alignments, self.max_attentions = Attention( self.Q, self.K, self.V, mononotic_attention=(not training), prev_max_attentions=self.prev_max_attentions) with tf.variable_scope("AudioDec"): self.Y_logits, self.Y = AudioDec( self.R, training=training) # (B, T/r, num_lf0+num_mgc+num_bap) else: # num==2 & training. Note that during training, with tf.variable_scope("WSRN"): self.Z_logits, self.Z = WSRN(self.worlds, training=training) if not training: # During inference, the predicted world values are fed. with tf.variable_scope("WSRN"): self.Z_logits, self.Z = WSRN(self.Y, training=training) with tf.variable_scope("gs"): self.global_step = tf.Variable(0, name='global_step', trainable=False) if training: if num == 1: # Text2World # world L1 loss self.loss_worlds = tf.losses.mean_squared_error( self.worlds, self.Y) #self.loss_worlds = tf.reduce_mean(tf.abs(self.Y - self.worlds)) # world binary divergence loss #self.loss_bd1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.Y_logits, labels=self.worlds)) # guided_attention loss self.A = tf.pad(self.alignments, [(0, 0), (0, hp.max_N), (0, hp.max_T)], mode="CONSTANT", constant_values=-1.)[:, :hp.max_N, :hp.max_T] self.attention_masks = tf.to_float(tf.not_equal(self.A, -1)) self.loss_att = tf.reduce_sum( tf.abs(self.A * self.gts) * self.attention_masks) self.mask_sum = tf.reduce_sum(self.attention_masks) self.loss_att /= self.mask_sum # total loss self.loss = self.loss_worlds + self.loss_att #self.loss_bd1 + tf.summary.scalar('train/loss_worlds', self.loss_worlds) #tf.summary.scalar('train/loss_bd1', self.loss_bd1) tf.summary.scalar('train/loss_att', self.loss_att) tf.summary.image( 'train/world_gt', tf.expand_dims(tf.transpose(self.worlds[:1], [0, 2, 1]), -1)) tf.summary.image( 'train/world_hat', tf.expand_dims(tf.transpose(self.Y[:1], [0, 2, 1]), -1)) else: #WSRN # world L1 loss self.loss_WSR = tf.losses.mean_squared_error( self.Z, self.worlds_WSR) # world binary divergence loss #self.loss_bd2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.Z_logits, labels=self.worlds_WSR)) # total loss self.loss = self.loss_WSR #+ self.loss_bd2 tf.summary.scalar('train/loss_world_SSRN', self.loss_WSR) #tf.summary.scalar('train/loss_bd2', self.loss_bd2) # Training Scheme self.lr = learning_rate_decay(hp.lr, self.global_step) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) tf.summary.scalar("lr", self.lr) ## gradient clipping self.gvs = self.optimizer.compute_gradients(self.loss) self.clipped = [] for grad, var in self.gvs: grad = tf.clip_by_value(grad, -1., 1.) self.clipped.append((grad, var)) self.train_op = self.optimizer.apply_gradients( self.clipped, global_step=self.global_step) # Summary self.merged = tf.summary.merge_all()
from tqdm import tqdm import os import logging from data_load import get_batch from utils import save_hparams, save_variable_specs, get_hypotheses, calc_bleu logging.basicConfig(level=logging.INFO) logging.info("# hparams") hparams = Hparams() hp = hparams.parse_arg() save_hparams(hp, hp.logdir) logging.info("# Prepare train/eval batches") train_batches, train_num_batches, train_samples = get_batch(hp.train1, hp.train2, hp.maxlen1, hp.maxlen2, hp.vocab, shuffle = True) eval_batches, eval_num_batches, eval_samples = get_batch(hp.eval1, hp.eval2, hp.maxlen1, hp.maxlen2, hp.vocab, shuffle = False) # create a iterator of the correct shape and type iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes) xs, ys = iter.get_next() train_init_op = iter.make_initializer(train_batches) eval_init_op = iter.make_initializer(eval_batches) logging.info("# Load model") m = Transformer(hp) loss, train_op, global_step, train_summaries = m.train(xs, ys)
import logging os.environ["CUDA_VISIBLE_DEVICES"] = "0" logging.basicConfig(level=logging.INFO) logging.info("# hparams") hparams = Hparams() parser = hparams.parser hp = parser.parse_args() save_hparams(hp, hp.logdir) logging.info("# Prepare train/eval batches") train_batches, num_train_batches, num_train_samples = get_batch( hp.train1, hp.train2, hp.maxlen1, hp.maxlen2, hp.vocab, hp.train_paraphrased, hp.batch_size, shuffle=True, paraphrase_type=hp.paraphrase_type) eval_batches, num_eval_batches, num_eval_samples = get_batch( hp.eval1, hp.eval2, 1000, 1000, hp.vocab, hp.eval_paraphrased, hp.batch_size, shuffle=False, paraphrase_type=hp.paraphrase_type)
def __init__(self, mode="train"): # Load vocabulary self.char2idx, self.idx2char = load_vocab() # Set phase is_training=True if mode=="train" else False # Graph # Data Feeding # x: Text. (N, Tx) # y: Reduced melspectrogram. (N, Ty//r, n_mels*r) # z: Magnitude. (N, Ty, n_fft//2+1) if mode=="train": self.x, self.y, self.z, self.fnames, self.num_batch = get_batch() elif mode=="eval": self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r)) self.z = tf.placeholder(tf.float32, shape=(None, None, 1+hp.n_fft//2)) self.fnames = tf.placeholder(tf.string, shape=(None,)) else: # Synthesize self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) # Get encoder/decoder inputs self.encoder_inputs = embed(self.x, len(hp.vocab), hp.embed_size) # (N, T_x, E) self.decoder_inputs = tf.concat((tf.zeros_like(self.y[:, :1, :]), self.y[:, :-1, :]), 1) # (N, Ty/r, n_mels*r) self.decoder_inputs = self.decoder_inputs[:, :, -hp.n_mels:] # feed last frames only (N, Ty/r, n_mels) # Networks with tf.variable_scope("net"): # Encoder self.memory = encoder(self.encoder_inputs, is_training=is_training) # (N, T_x, E) # Decoder1 self.y_hat, self.alignments = decoder1(self.decoder_inputs, self.memory, is_training=is_training) # (N, T_y//r, n_mels*r) # Decoder2 or postprocessing self.z_hat = decoder2(self.y_hat, is_training=is_training) # (N, T_y//r, (1+n_fft//2)*r) # monitor self.audio = tf.py_func(spectrogram2wav, [self.z_hat[0]], tf.float32) if mode in ("train", "eval"): # Loss self.loss1 = tf.reduce_mean(tf.abs(self.y_hat - self.y)) self.loss2 = tf.reduce_mean(tf.abs(self.z_hat - self.z)) self.loss = self.loss1 + self.loss2 # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.lr = learning_rate_decay(hp.lr, global_step=self.global_step) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) ## gradient clipping self.gvs = self.optimizer.compute_gradients(self.loss) self.clipped = [] for grad, var in self.gvs: grad = tf.clip_by_norm(grad, 5.) self.clipped.append((grad, var)) self.train_op = self.optimizer.apply_gradients(self.clipped, global_step=self.global_step) # Summary tf.summary.scalar('{}/loss1'.format(mode), self.loss1) tf.summary.scalar('{}/loss'.format(mode), self.loss) tf.summary.scalar('{}/lr'.format(mode), self.lr) tf.summary.image("{}/mel_gt".format(mode), tf.expand_dims(self.y, -1), max_outputs=1) tf.summary.image("{}/mel_hat".format(mode), tf.expand_dims(self.y_hat, -1), max_outputs=1) tf.summary.image("{}/mag_gt".format(mode), tf.expand_dims(self.z, -1), max_outputs=1) tf.summary.image("{}/mag_hat".format(mode), tf.expand_dims(self.z_hat, -1), max_outputs=1) tf.summary.audio("{}/sample".format(mode), tf.expand_dims(self.audio, 0), hp.sr) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.z, self.num_batch = get_batch() self.decoder_inputs = shift_by_one(self.y) # Make sure that batch size was multiplied by # gpus. # Now we split the mini-batch data by # gpus. self.x = tf.split(self.x, hp.num_gpus, 0) self.y = tf.split(self.y, hp.num_gpus, 0) self.z = tf.split(self.z, hp.num_gpus, 0) self.decoder_inputs = tf.split(self.decoder_inputs, hp.num_gpus, 0) # Sequence lengths for masking self.x_lengths = tf.to_int32(tf.reduce_sum(tf.sign(tf.abs(self.x)), -1)) # (N,) self.x_masks = tf.to_float(tf.expand_dims(tf.sign(tf.abs(self.x)), -1)) # (N, T, 1) # optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.losses, self.grads_and_vars_list = [], [] for i in range(hp.num_gpus): with tf.variable_scope('net', reuse=bool(i)): with tf.device('/gpu:{}'.format(i)): with tf.name_scope('gpu_{}'.format(i)): # Encoder self.memory = encode(self.x[i], is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1(self.decoder_inputs[i], self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) # Loss if hp.loss_type=="l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y[i]) self.loss2 = tf.abs(self.outputs2 - self.z[i]) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y[i]) self.loss2 = tf.squared_difference(self.outputs2, self.z[i]) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y[i], 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z[i], 0.)) self.loss1 = tf.reduce_mean(self.loss1) self.loss2 = tf.reduce_mean(self.loss2) self.loss = self.loss1 + self.loss2 self.losses.append(self.loss) self.grads_and_vars = self.optimizer.compute_gradients(self.loss) self.grads_and_vars_list.append(self.grads_and_vars) with tf.device('/cpu:0'): # Aggregate losses, then calculate average loss. self.mean_loss = tf.add_n(self.losses) / len(self.losses) #Aggregate gradients, then calculate average gradients. self.mean_grads_and_vars = [] for grads_and_vars in zip(*self.grads_and_vars_list): grads = [] for grad, var in grads_and_vars: if grad is not None: grads.append(tf.expand_dims(grad, 0)) mean_grad = tf.reduce_mean(tf.concat(grads, 0), 0) #() self.mean_grads_and_vars.append((mean_grad, var)) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.train_op = self.optimizer.apply_gradients(self.mean_grads_and_vars, self.global_step) # Summmary tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope('net'): # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1(self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r)
def add_data(self, reuse=None): ''' Add either variables (for training) or placeholders (for synthesis) to the graph ''' # Data Feeding ## L: Text. (B, N), int32 ## mels: Reduced melspectrogram. (B, T/r, n_mels) float32 ## mags: Magnitude. (B, T, n_fft//2+1) float32 hp = self.hp if self.mode is 'train': batchdict = get_batch(hp, self.get_batchsize()) if 0: print(batchdict) print(batchdict.keys()) sys.exit('vsfbd') self.L, self.mels, self.mags, self.fnames, self.num_batch = \ batchdict['text'], batchdict['mel'], batchdict['mag'], batchdict['fname'], batchdict['num_batch'] if hp.multispeaker: ## check multispeaker config is valid:- TODO: to config validation? for position in hp.multispeaker: assert position in ['text_encoder_input', 'text_encoder_towards_end', \ 'audio_decoder_input', 'ssrn_input', 'audio_encoder_input',\ 'learn_channel_contributions', 'speaker_dependent_phones'] self.speakers = batchdict['speaker'] else: self.speakers = None if hp.attention_guide_dir: self.gts = batchdict['attention_guide'] else: self.gts = tf.convert_to_tensor(get_global_attention_guide(hp)) if hp.use_external_durations: self.durations = batchdict['duration'] if hp.merlin_label_dir: self.merlin_label = batchdict['merlin_label'] if 'position_in_phone' in hp.history_type: self.position_in_phone = batchdict['position_in_phone'] batchsize = self.get_batchsize() self.prev_max_attentions = tf.ones(shape=(batchsize, ), dtype=tf.int32) ## TODO refactor to remove redundancy between the next 2 branches? elif self.mode is 'synthesize': # synthesis self.L = tf.placeholder(tf.int32, shape=(None, None)) self.speakers = None if hp.multispeaker: self.speakers = tf.placeholder(tf.int32, shape=(None, None)) if hp.use_external_durations: self.durations = tf.placeholder(tf.float32, shape=(None, None, None)) if hp.merlin_label_dir: self.merlin_label = tf.placeholder(tf.float32, shape=(None, None, hp.merlin_lab_dim)) if 'position_in_phone' in hp.history_type: self.position_in_phone = tf.placeholder(tf.float32, shape=(None, None, 1)) self.mels = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels)) self.prev_max_attentions = tf.placeholder(tf.int32, shape=(None, )) elif self.mode is 'generate_attention': self.L = tf.placeholder(tf.int32, shape=(None, None)) self.speakers = None if hp.multispeaker: self.speakers = tf.placeholder(tf.int32, shape=(None, None)) if hp.use_external_durations: self.durations = tf.placeholder(tf.float32, shape=(None, None, None)) if hp.merlin_label_dir: self.merlin_label = tf.placeholder(tf.float32, shape=(None, None, hp.merlin_lab_dim)) if 'position_in_phone' in hp.history_type: self.position_in_phone = tf.placeholder(tf.float32, shape=(None, None, 1)) self.mels = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels))
def train(logdir='logdir/default/train1', queue=True): model = Model(mode="train1", batch_size=hp.Train1.batch_size, queue=queue) # Loss loss_op = model.loss_net1() # Accuracy acc_op = model.acc_net1() # Training Scheme global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=hp.Train1.lr) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'net/net1') train_op = optimizer.minimize(loss_op, global_step=global_step, var_list=var_list) # Summary for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'net/net1'): tf.summary.histogram(v.name, v) tf.summary.scalar('net1/train/loss', loss_op) tf.summary.scalar('net1/train/acc', acc_op) summ_op = tf.summary.merge_all() session_conf = tf.ConfigProto( gpu_options=tf.GPUOptions( allow_growth=True, ), ) # Training with tf.Session(config=session_conf) as sess: # Load trained model sess.run(tf.global_variables_initializer()) model.load(sess, 'train1', logdir=logdir) writer = tf.summary.FileWriter(logdir, sess.graph) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for epoch in range(1, hp.Train1.num_epochs + 1): for step in tqdm(range(model.num_batch), total=model.num_batch, ncols=70, leave=False, unit='b'): if queue: sess.run(train_op) else: mfcc, ppg = get_batch(model.mode, model.batch_size) #print("MFCC shape: {}".format(mfcc.shape)) #print("types: {} and {}".format(mfcc.dtype, ppg.dtype)) #print("PPG shape: {}".format(ppg.shape)) sess.run(train_op, feed_dict={model.x_mfcc: mfcc, model.y_ppgs: ppg}) # Write checkpoint files at every epoch summ, gs = sess.run([summ_op, global_step], feed_dict={model.x_mfcc: mfcc, model.y_ppgs: ppg}) # There was a problem where in certain environments placeholder must be fed for these ops if epoch % hp.Train1.save_per_epoch == 0: tf.train.Saver().save(sess, '{}/epoch_{}_step_{}'.format(logdir, epoch, gs)) # Write eval accuracy at every epoch with tf.Graph().as_default(): eval1.eval(logdir=logdir, queue=False) writer.add_summary(summ, global_step=gs) writer.close() coord.request_stop() coord.join(threads)
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, hp.maxlen,)) self.y = tf.placeholder(tf.int32, shape=(None, hp.maxlen,)) # Load vocabulary pnyn2idx, _, hanzi2idx, _ = load_vocab() # Character Embedding for x enc = embed(self.x, len(pnyn2idx), hp.embed_size, scope="emb_x") # Encoder pre-net prenet_out = prenet(enc, num_units=[hp.embed_size, hp.embed_size // 2], is_training=is_training) # (N, T, E/2) # Encoder CBHG ## Conv1D bank enc = conv1d_banks(prenet_out, K=hp.encoder_num_banks, num_units=hp.embed_size // 2, is_training=is_training) # (N, T, K * E / 2) ## Max pooling enc = tf.layers.max_pooling1d(enc, 2, 1, padding="same") # (N, T, K * E / 2) ## Conv1D projections enc = conv1d(enc, hp.embed_size // 2, 5, scope="conv1d_1") # (N, T, E/2) enc = normalize(enc, type=hp.norm_type, is_training=is_training, activation_fn=tf.nn.relu, scope="norm1") enc = conv1d(enc, hp.embed_size // 2, 5, scope="conv1d_2") # (N, T, E/2) enc = normalize(enc, type=hp.norm_type, is_training=is_training, activation_fn=None, scope="norm2") enc += prenet_out # (N, T, E/2) # residual connections ## Highway Nets for i in range(hp.num_highwaynet_blocks): enc = highwaynet(enc, num_units=hp.embed_size // 2, scope='highwaynet_{}'.format(i)) # (N, T, E/2) ## Bidirectional GRU enc = gru(enc, hp.embed_size // 2, True, scope="gru1") # (N, T, E) ## Readout self.outputs = tf.layers.dense(enc, len(hanzi2idx), use_bias=False) self.preds = tf.to_int32(tf.arg_max(self.outputs, dimension=-1)) if is_training: self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=self.outputs) self.istarget = tf.to_float(tf.not_equal(self.y, tf.zeros_like(self.y))) # masking self.hits = tf.to_float(tf.equal(self.preds, self.y)) * self.istarget self.acc = tf.reduce_sum(self.hits) / tf.reduce_sum(self.istarget) self.mean_loss = tf.reduce_sum(self.loss * self.istarget) / tf.reduce_sum(self.istarget) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize(self.mean_loss, global_step=self.global_step) # Summary tf.summary.scalar('mean_loss', self.mean_loss) tf.summary.scalar('acc', self.acc) self.merged = tf.summary.merge_all()
def __init__(self, training=True): # Load vocabulary self.char2idx, self.idx2char = load_vocab() # Graph self.graph = tf.Graph() with self.graph.as_default(): # Data Feeding ## x: Text. (N, Tx), int32 ## y1: Reduced melspectrogram. (N, Ty//r, n_mels*r) float32 ## y2: Reduced dones. (N, Ty//r,) int32 ## z: Magnitude. (N, Ty, n_fft//2+1) float32 if training: self.x, self.y1, self.y2, self.z, self.num_batch = get_batch() self.prev_max_attentions_li = tf.ones(shape=(hp.dec_layers, hp.batch_size), dtype=tf.int32) else: # Inference self.x = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.Tx)) self.y1 = tf.placeholder(tf.float32, shape=(hp.batch_size, hp.Ty//hp.r, hp.n_mels*hp.r)) self.prev_max_attentions_li = tf.placeholder(tf.int32, shape=(hp.dec_layers, hp.batch_size,)) # Get decoder inputs: feed last frames only (N, Ty//r, n_mels) self.decoder_input = tf.concat((tf.zeros_like(self.y1[:, :1, -hp.n_mels:]), self.y1[:, :-1, -hp.n_mels:]), 1) # Networks with tf.variable_scope("encoder"): self.keys, self.vals = encoder(self.x, training=training) # (N, Tx, e) with tf.variable_scope("decoder"): # mel_logits: (N, Ty/r, n_mels*r) # done_output: (N, Ty/r, 2), # decoder_output: (N, Ty/r, e) # alignments_li: dec_layers*(Tx, Ty/r) # max_attentions_li: dec_layers*(N, T_y/r) self.mel_logits, self.done_output, self.decoder_output, self.alignments_li, self.max_attentions_li \ = decoder(self.decoder_input, self.keys, self.vals, self.prev_max_attentions_li, training=training) self.mel_output = tf.nn.sigmoid(self.mel_logits) with tf.variable_scope("converter"): # Restore shape self.converter_input = tf.reshape(self.decoder_output, (-1, hp.Ty, hp.embed_size//hp.r)) self.converter_input = fc_block(self.converter_input, hp.converter_channels, activation_fn=tf.nn.relu, training=training) # (N, Ty, v) # Converter self.mag_logits = converter(self.converter_input, training=training) # (N, Ty, 1+n_fft//2) self.mag_output = tf.nn.sigmoid(self.mag_logits) self.global_step = tf.Variable(0, name='global_step', trainable=False) if training: # Loss self.loss_mels = tf.reduce_mean(tf.abs(self.mel_output - self.y1)) self.loss_dones = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.done_output, labels=self.y2)) self.loss_mags = tf.reduce_mean(tf.abs(self.mag_output - self.z)) self.loss = self.loss_mels + self.loss_dones + self.loss_mags # Training Scheme self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) ## gradient clipping self.gvs = self.optimizer.compute_gradients(self.loss) self.clipped = [] for grad, var in self.gvs: grad = tf.clip_by_value(grad, -1. * hp.max_grad_val, hp.max_grad_val) grad = tf.clip_by_norm(grad, hp.max_grad_norm) self.clipped.append((grad, var)) self.train_op = self.optimizer.apply_gradients(self.clipped, global_step=self.global_step) # Summary tf.summary.scalar('Train_Loss/LOSS', self.loss) tf.summary.scalar('Train_Loss/mels', self.loss_mels) tf.summary.scalar('Train_Loss/dones', self.loss_dones) tf.summary.scalar('Train_Loss/mags', self.loss_mags) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.z, self.num_batch = get_batch() self.decoder_inputs = shift_by_one(self.y) # Note that batch size was multiplied by # gpus. # Now we split the mini-batch data by # gpus. self.x = tf.split(self.x, hp.num_gpus, 0) self.y = tf.split(self.y, hp.num_gpus, 0) self.z = tf.split(self.z, hp.num_gpus, 0) self.decoder_inputs = tf.split(self.decoder_inputs, hp.num_gpus, 0) # optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.losses, self.grads_and_vars_list = [], [] for i in range(hp.num_gpus): with tf.variable_scope('net', reuse=bool(i)): with tf.device('/gpu:{}'.format(i)): with tf.name_scope('gpu_{}'.format(i)): # Encoder self.memory = encode( self.x[i], is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1( self.decoder_inputs[i], self.memory) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2( self.outputs1, is_training=is_training ) # (N, T', (1+hp.n_fft//2)*hp.r) # Loss if hp.loss_type == "l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y[i]) self.loss2 = tf.abs(self.outputs2 - self.z[i]) else: # L2 loss self.loss1 = tf.squared_difference( self.outputs1, self.y[i]) self.loss2 = tf.squared_difference( self.outputs2, self.z[i]) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float( tf.not_equal(self.y[i], 0.)) self.loss2 *= tf.to_float( tf.not_equal(self.z[i], 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 self.losses.append(self.mean_loss) self.grads_and_vars = self.optimizer.compute_gradients( self.mean_loss) self.grads_and_vars_list.append( self.grads_and_vars) with tf.device('/cpu:0'): # Aggregate losses, then calculate average loss. self.loss = tf.add_n(self.losses) / len(self.losses) #Aggregate gradients, then calculate average gradients. self.mean_grads_and_vars = [] for grads_and_vars in zip(*self.grads_and_vars_list): grads = [] for grad, var in grads_and_vars: grads.append(tf.expand_dims(grad, 0)) mean_grad = tf.reduce_mean(tf.concat(grads, 0), 0) #() self.mean_grads_and_vars.append((mean_grad, var)) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.train_op = self.optimizer.apply_gradients( self.mean_grads_and_vars, self.global_step) # Summmary tf.summary.scalar('loss', self.loss) self.merged = tf.summary.merge_all() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.decoder_inputs = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1(self.decoder_inputs, self.memory) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2( self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r)
from hparams import Hparams from model import Transformer from utils import save_hparams, save_variable_specs, get_hypotheses logging.basicConfig(level=logging.INFO) logging.info("# hparams") hparams = Hparams() parser = hparams.parser hp = parser.parse_args() save_hparams(hp, hp.logdir) logging.info("# Prepare train/eval batches") train_batches, num_train_batches, num_train_samples = get_batch(hp.train, hp.maxlen, hp.maxlen, hp.vocab, hp.batch_size, shuffle=True) eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.eval, hp.maxlen, hp.maxlen, hp.vocab, hp.batch_size, shuffle=False) # create a iterator of the correct shape and type iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes) xs, ys = iter.get_next() logging.info('# init data')
def __init__(self, config=None): self.char2idx, self.idx2char = load_vocab() self.graph = tf.Graph() with self.graph.as_default(): self.origx, _, _, _, _, _ = get_batch(config, 'Encoder')
import logging from tqdm import tqdm logging.basicConfig(level=logging.INFO) logging.info("# hparams") hparams = Hparams() parser = hparams.parser hp = parser.parse_args() load_hparams(hp, hp.modeldir) logging.info("# Prepare test batches") test_batches, num_test_batches, num_test_samples = get_batch( hp.test_source, hp.test_target, 100000, 100000, hp.vocab, hp.test_batch_size, shuffle=False) iter = tf.data.Iterator.from_structure(test_batches.output_types, test_batches.output_shapes) xs, ys = iter.get_next() test_init_op = iter.make_initializer(test_batches) logging.info("# Load model") m = Transformer(hp) y_hat, _, refs = m.eval(xs, ys) logging.info("# Session") with tf.Session() as sess:
def __init__(self, config=None, training=True, train_form='Both'): # Load vocabulary self.char2idx, self.idx2char = load_vocab() self.graph = tf.Graph() with self.graph.as_default(): if training: self.origx, self.x, self.y1, self.y2, self.y3, self.num_batch = get_batch( config, train_form) self.prev_max_attentions_li = tf.ones(shape=(hp.dec_layers, self.num_batch), dtype=tf.int32) else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(1, hp.T_x)) self.y1 = tf.placeholder(tf.float32, shape=(1, hp.T_y // hp.r, hp.n_mels * hp.r)) self.prev_max_attentions_li = tf.placeholder(tf.int32, shape=( hp.dec_layers, 1, )) # Get decoder inputs: feed last frames only if train_form != 'Converter': self.decoder_input = tf.concat( (tf.zeros_like(self.y1[:, :1, -hp.n_mels:]), self.y1[:, :-1, -hp.n_mels:]), 1) # Networks if train_form != 'Converter': with tf.variable_scope("encoder"): self.encoded = encoder(self.x, training=training) with tf.variable_scope("decoder"): self.mel_logits, self.done_output, self.max_attentions_li = decoder( self.decoder_input, self.encoded, self.prev_max_attentions_li, training=training) #self.mel_output = self.mel_logits self.mel_output = tf.nn.sigmoid(self.mel_logits) if train_form == 'Both': with tf.variable_scope("converter"): #self.converter_input = tf.reshape(self.mel_output, (-1, hp.T_y, hp.n_mels)) self.converter_input = self.mel_output self.mag_logits = converter(self.converter_input, training=training) self.mag_output = tf.nn.sigmoid(self.mag_logits) elif train_form == 'Converter': with tf.variable_scope("converter"): #self.converter_input = tf.reshape(self.mel_output, (-1, hp.T_y, hp.n_mels)) self.converter_input = self.y1 self.mag_logits = converter(self.converter_input, training=training) self.mag_output = tf.nn.sigmoid(self.mag_logits) self.global_step = tf.Variable(0, name='global_step', trainable=False) if training: # Loss if train_form != 'Converter': self.loss1 = tf.reduce_mean( tf.abs(self.mel_output - self.y1)) if hp.include_dones: self.loss2 = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.done_output, labels=self.y2)) if train_form != 'Encoder': self.loss3 = tf.reduce_mean( tf.abs(self.mag_output - self.y3)) if train_form == 'Both': if hp.include_dones: self.loss = self.loss1 + self.loss2 + self.loss3 else: self.loss = self.loss1 + self.loss3 elif train_form == 'Encoder': if hp.include_dones: self.loss = self.loss1 + self.loss2 else: self.loss = self.loss1 else: self.loss = self.loss3 # Training Scheme self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) ## gradient clipping self.gvs = self.optimizer.compute_gradients(self.loss) self.clipped = [] for grad, var in self.gvs: grad = grad if grad is None else tf.clip_by_value( grad, -1. * hp.max_grad_val, hp.max_grad_val) grad = grad if grad is None else tf.clip_by_norm( grad, hp.max_grad_norm) self.clipped.append((grad, var)) self.train_op = self.optimizer.apply_gradients( self.clipped, global_step=self.global_step) # Summary tf.summary.scalar('loss', self.loss) if train_form != 'Converter': tf.summary.histogram('mel_output', self.mel_output) tf.summary.histogram('mel_actual', self.y1) tf.summary.scalar('loss1', self.loss1) if hp.include_dones: tf.summary.histogram('done_output', self.done_output) tf.summary.histogram('done_actual', self.y2) tf.summary.scalar('loss2', self.loss2) if train_form != 'Encoder': tf.summary.histogram('mag_output', self.mag_output) tf.summary.histogram('mag_actual', self.y3) tf.summary.scalar('loss3', self.loss3) self.merged = tf.summary.merge_all()
def __init__(self, mode="train"): # Load vocabulary self.char2idx, self.idx2char = load_vocab() # Set phase is_training = True if mode == "train" else False # Graph # Data Feeding # x: Text. (N, Tx) # y: Reduced melspectrogram. (N, Ty//r, n_mels*r) # z: Magnitude. (N, Ty, n_fft//2+1) if mode == "train": self.x, self.y, self.z, self.fnames, self.num_batch = get_batch() elif mode == "eval": self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.z = tf.placeholder(tf.float32, shape=(None, None, 1 + hp.n_fft // 2)) self.fnames = tf.placeholder(tf.string, shape=(None, )) else: # Synthesize self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) # Get encoder/decoder inputs self.encoder_inputs = embed(self.x, len(hp.vocab), hp.embed_size) # (N, T_x, E) self.decoder_inputs = tf.concat( (tf.zeros_like(self.y[:, :1, :]), self.y[:, :-1, :]), 1) # (N, Ty/r, n_mels*r) self.decoder_inputs = self.decoder_inputs[:, :, -hp. n_mels:] # feed last frames only (N, Ty/r, n_mels) # Networks with tf.variable_scope("net"): # Encoder self.memory = encoder(self.encoder_inputs, is_training=is_training) # (N, T_x, E) # Decoder1 self.y_hat, self.alignments = decoder1( self.decoder_inputs, self.memory, is_training=is_training) # (N, T_y//r, n_mels*r) # Decoder2 or postprocessing self.z_hat = decoder2( self.y_hat, is_training=is_training) # (N, T_y//r, (1+n_fft//2)*r) # monitor self.audio = tf.py_func(spectrogram2wav, [self.z_hat[0]], tf.float32) if mode in ("train", "eval"): # Loss self.loss1 = tf.reduce_mean(tf.abs(self.y_hat - self.y)) self.loss2 = tf.reduce_mean(tf.abs(self.z_hat - self.z)) self.loss = self.loss1 + self.loss2 # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.lr = learning_rate_decay(hp.lr, global_step=self.global_step) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) ## gradient clipping self.gvs = self.optimizer.compute_gradients(self.loss) self.clipped = [] for grad, var in self.gvs: grad = tf.clip_by_norm(grad, 5.) self.clipped.append((grad, var)) self.train_op = self.optimizer.apply_gradients( self.clipped, global_step=self.global_step) # Summary tf.summary.scalar('{}/loss1'.format(mode), self.loss1) tf.summary.scalar('{}/loss'.format(mode), self.loss) tf.summary.scalar('{}/lr'.format(mode), self.lr) tf.summary.image("{}/mel_gt".format(mode), tf.expand_dims(self.y, -1), max_outputs=1) tf.summary.image("{}/mel_hat".format(mode), tf.expand_dims(self.y_hat, -1), max_outputs=1) tf.summary.image("{}/mag_gt".format(mode), tf.expand_dims(self.z, -1), max_outputs=1) tf.summary.image("{}/mag_hat".format(mode), tf.expand_dims(self.z_hat, -1), max_outputs=1) tf.summary.audio("{}/sample".format(mode), tf.expand_dims(self.audio, 0), hp.sr) self.merged = tf.summary.merge_all()
def train(logdir, hparams): model = Model(mode="train1", hparams=hparams) # Loss loss_op = model.loss_net1() # Accuracy acc_op = model.acc_net1() # Training Scheme global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=hparams.Train1.lr) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'net/net1') train_op = optimizer.minimize(loss_op, global_step=global_step, var_list=var_list) # Summary # for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'net/net1'): # tf.summary.histogram(v.name, v) tf.summary.scalar('net1/train/loss', loss_op) tf.summary.scalar('net1/train/acc', acc_op) summ_op = tf.summary.merge_all() #session_conf = tf.ConfigProto( # gpu_options=tf.GPUOptions( # allow_growth=True, # ), #) session_conf=tf.ConfigProto() session_conf.gpu_options.per_process_gpu_memory_fraction=0.9 # Training with tf.Session(config=session_conf) as sess: # Load trained model sess.run(tf.global_variables_initializer()) model.load(sess, 'train1', logdir=logdir) writer = tf.summary.FileWriter(logdir, sess.graph) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for epoch in range(1, hparams.Train1.num_epochs + 1): for step in range(model.num_batch): mfcc, ppg = get_batch(model.mode, model.batch_size) sess.run(train_op, feed_dict={model.x_mfcc: mfcc, model.y_ppgs: ppg}) # Write checkpoint files at every epoch summ, gs = sess.run([summ_op, global_step], feed_dict={model.x_mfcc: mfcc, model.y_ppgs: ppg}) if epoch % hparams.Train1.save_per_epoch == 0: tf.train.Saver().save(sess, '{}/epoch_{}_step_{}'.format(logdir, epoch, gs)) # Write eval accuracy at every epoch with tf.Graph().as_default(): eval1.eval(logdir=logdir, hparams=hparams) writer.add_summary(summ, global_step=gs) writer.close() coord.request_stop() coord.join(threads)
import logging logging.basicConfig(level=logging.INFO) os.environ['CUDA_VISIBLE_DEVICES'] = "5" logging.info("# hparams") hparams = Hparams() parser = hparams.parser hp = parser.parse_args() save_hparams(hp, hp.logdir) logging.info("# Prepare train/eval batches") train_batches, num_train_batches, num_train_samples = get_batch(hp.train1, hp.train2, hp.maxlen1, hp.maxlen2, hp.vocab, hp.batch_size, shuffle=True) eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.eval1, hp.eval2, 100000, 100000, hp.vocab, hp.batch_size, shuffle=False) # create a iterator of the correct shape and type iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes) xs, ys = iter.get_next()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.z, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope("net"): # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1(self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) if is_training: # Loss if hp.loss_type=="l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y) self.loss2 = tf.abs(self.outputs2 - self.z) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y) self.loss2 = tf.squared_difference(self.outputs2, self.z) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 # Logging ## histograms self.expected1_h = tf.reduce_mean(tf.reduce_mean(self.y, -1), 0) self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1),0) self.expected2_h = tf.reduce_mean(tf.reduce_mean(self.z, -1), 0) self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1),0) ## images self.expected1_i = tf.expand_dims(tf.reduce_mean(self.y[:1], -1, keep_dims=True), 1) self.got1_i = tf.expand_dims(tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1) self.expected2_i = tf.expand_dims(tf.reduce_mean(self.z[:1], -1, keep_dims=True), 1) self.got2_i = tf.expand_dims(tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize(self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss1', self.mean_loss1) tf.summary.scalar('mean_loss2', self.mean_loss2) tf.summary.scalar('mean_loss', self.mean_loss) tf.summary.histogram('expected_values1', self.expected1_h) tf.summary.histogram('gotten_values1', self.got1_h) tf.summary.histogram('expected_values2', self.expected2_h) tf.summary.histogram('gotten values2', self.got2_h) tf.summary.image("expected_values1", self.expected1_i*255) tf.summary.image("gotten_values1", self.got1_i*255) tf.summary.image("expected_values2", self.expected2_i*255) tf.summary.image("gotten_values2", self.got2_i*255) self.merged = tf.summary.merge_all()