コード例 #1
0
    def __init__(self, is_training=True):
        # Build the computational graph when initializing
        self.is_training = is_training
        self.graph = tf.Graph()
        with self.graph.as_default():
            self.dropout = tf.placeholder_with_default(0.0, (), name="dropout")
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
            self.data, self.num_batch = get_batch(is_training=is_training)
            (self.passage_w, self.question_w, self.passage_c, self.question_c,
             self.passage_w_len_, self.question_w_len_,
             self.indices) = self.data

            self.passage_len = tf.squeeze(self.passage_w_len_)
            self.question_len = tf.squeeze(self.question_w_len_)

            self.encode_ids()
            self.embedding_encoder()
            self.context_to_query()
            self.model_encoder()
            self.output_layer()

            self.loss_function()
            if is_training:
                self.summary()
                self.init_op = tf.global_variables_initializer()
            total_params()
コード例 #2
0
ファイル: train2.py プロジェクト: QianQQ/Voice-Conversion
def train(logdir1='logdir/default/train1', logdir2='logdir/default/train2', queue=True):
    model = Model(mode="train2", batch_size=hp.Train2.batch_size, queue=queue)

    # Loss
    loss_op = model.loss_net2()

    # Training Scheme
    global_step = tf.Variable(0, name='global_step', trainable=False)

    optimizer = tf.train.AdamOptimizer(learning_rate=hp.Train2.lr)
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'net/net2')
        train_op = optimizer.minimize(loss_op, global_step=global_step, var_list=var_list)

    # Summary
    summ_op = summaries(loss_op)

    session_conf = tf.ConfigProto(
        gpu_options=tf.GPUOptions(
            allow_growth=True,
            per_process_gpu_memory_fraction=0.6,
        ),
    )
    # Training
    with tf.Session(config=session_conf) as sess:
        # Load trained model
        sess.run(tf.global_variables_initializer())
        model.load(sess, mode='train2', logdir=logdir1, logdir2=logdir2)

        writer = tf.summary.FileWriter(logdir2, sess.graph)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        for epoch in range(1, hp.Train2.num_epochs + 1):
            for step in tqdm(range(model.num_batch), total=model.num_batch, ncols=70, leave=False, unit='b'):
                if queue:
                    sess.run(train_op)
                else:
                    mfcc, spec, mel = get_batch(model.mode, model.batch_size)
                    sess.run(train_op, feed_dict={model.x_mfcc: mfcc, model.y_spec: spec, model.y_mel: mel})

            # Write checkpoint files at every epoch
            summ, gs = sess.run([summ_op, global_step])

            if epoch % hp.Train2.save_per_epoch == 0:
                tf.train.Saver().save(sess, '{}/epoch_{}_step_{}'.format(logdir2, epoch, gs))

                # Eval at every n epochs
                with tf.Graph().as_default():
                    eval2.eval(logdir2, queue=False)

                # Convert at every n epochs
                with tf.Graph().as_default():
                    convert.convert(logdir2, queue=False)

            writer.add_summary(summ, global_step=gs)

        writer.close()
        coord.request_stop()
        coord.join(threads)
コード例 #3
0
ファイル: run.py プロジェクト: xzk-seu/xzk_thesis
def train():
    model.train()  # Turn on the train mode
    total_loss = 0.
    start_time = time.time()
    src_mask = model.generate_square_subsequent_mask(bptt).to(device)
    for batch, i in tqdm(enumerate(range(0, train_data.size(0) - 1, bptt))):
        data, targets = get_batch(train_data, i)
        optimizer.zero_grad()
        if data.size(0) != bptt:
            src_mask = model.generate_square_subsequent_mask(
                data.size(0)).to(device)
        output = model(data, src_mask)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

        total_loss += loss.item()
        log_interval = 200
        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | '
                  'lr {:02.2f} | ms/batch {:5.2f} | '
                  'loss {:5.2f} | ppl {:8.2f}'.format(
                      epoch, batch,
                      len(train_data) // bptt,
                      scheduler.get_lr()[0], elapsed * 1000 / log_interval,
                      cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
コード例 #4
0
ファイル: model.py プロジェクト: jasontruong/R-net
	def __init__(self,is_training = True):
		# Build the computational graph when initializing
		self.is_training = is_training
		self.graph = tf.Graph()
		with self.graph.as_default():
			self.global_step = tf.Variable(0, name='global_step', trainable=False)
			self.data, self.num_batch = get_batch(is_training = is_training)
			(self.passage_w,
			self.question_w,
			self.passage_c,
			self.question_c,
			self.passage_w_len_,
			self.question_w_len_,
			self.passage_c_len,
			self.question_c_len,
			self.indices) = self.data

			self.passage_w_len = tf.squeeze(self.passage_w_len_)
			self.question_w_len = tf.squeeze(self.question_w_len_)

			self.encode_ids()
			self.params = get_attn_params(Params.attn_size, initializer = tf.contrib.layers.xavier_initializer)
			self.attention_match_rnn()
			self.bidirectional_readout()
			self.pointer_network()

			if is_training:
				self.loss_function()
				self.summary()
				self.init_op = tf.global_variables_initializer()
			else:
				self.outputs()
			total_params()
コード例 #5
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.num_batch = get_batch()
            else:  # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(
                    None,
                    hp.max_len,
                ))
                self.y = tf.placeholder(tf.int32, shape=(
                    None,
                    hp.max_len,
                ))

            # Character Embedding for x
            self.enc = embed(self.x,
                             len(roma2idx),
                             hp.embed_size,
                             scope="emb_x")

            # Encoder
            self.memory = encode(self.enc, is_training=True)

            # Character Embedding for decoder_inputs
            self.decoder_inputs = shift_by_one(self.y)
            self.dec = embed(self.decoder_inputs,
                             len(surf2idx),
                             hp.embed_size,
                             scope="emb_decoder_inputs")

            # Decoder
            self.outputs = decode(
                self.dec, self.memory, len(surf2idx),
                is_training=is_training)  # (N, T', hp.n_mels*hp.r)
            self.logprobs = tf.log(tf.nn.softmax(self.outputs) + 1e-10)
            self.preds = tf.arg_max(self.outputs, dimension=-1)

            if is_training:
                self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.y, logits=self.outputs)
                self.istarget = tf.to_float(
                    tf.not_equal(self.y, tf.zeros_like(self.y)))  # masking
                self.mean_loss = tf.reduce_sum(self.loss * self.istarget) / (
                    tf.reduce_sum(self.istarget) + 1e-5)

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                self.train_op = self.optimizer.minimize(
                    self.mean_loss, global_step=self.global_step)

                # Summmary
                tf.summary.scalar('mean_loss', self.mean_loss)
                self.merged = tf.summary.merge_all()
コード例 #6
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()

        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.z, self.num_batch = get_batch()
            else:  # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(None, None))
                self.y = tf.placeholder(tf.float32,
                                        shape=(None, None, hp.n_mels * hp.r))

            self.decoder_inputs = shift_by_one(self.y)
            with tf.variable_scope("net"):
                # Encoder
                self.memory = encode(self.x,
                                     is_training=is_training)  # (N, T, E)

                # Decoder
                self.outputs1 = decode1(
                    self.decoder_inputs, self.memory,
                    is_training=is_training)  # (N, T', hp.n_mels*hp.r)
                self.outputs2 = decode2(
                    self.outputs1,
                    is_training=is_training)  # (N, T', (1+hp.n_fft//2)*hp.r)

            if is_training:
                # Loss
                if hp.loss_type == "l1":  # L1 loss
                    self.loss1 = tf.abs(self.outputs1 - self.y)
                    self.loss2 = tf.abs(self.outputs2 - self.z)
                else:  # L2 loss
                    self.loss1 = tf.squared_difference(self.outputs1, self.y)
                    self.loss2 = tf.squared_difference(self.outputs2, self.z)

                # Target masking
                if hp.target_zeros_masking:
                    self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.))
                    self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.))

                self.mean_loss1 = tf.reduce_mean(self.loss1)
                self.mean_loss2 = tf.reduce_mean(self.loss2)
                self.mean_loss = self.mean_loss1 + self.mean_loss2

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                self.train_op = self.optimizer.minimize(
                    self.mean_loss, global_step=self.global_step)

                # Summmary
                tf.summary.scalar('mean_loss1', self.mean_loss1)
                tf.summary.scalar('mean_loss2', self.mean_loss2)
                tf.summary.scalar('mean_loss', self.mean_loss)
                self.merged = tf.summary.merge_all()
コード例 #7
0
ファイル: train.py プロジェクト: ymsBFSU/cws_biLSTM_crf
def evaluate(data, sess, model, epoch=None):
    # seqs_list = []
    labels_pred = []
    label_references = []
    # (sents, tags) = data
    label2tag = []
    label2tag = {label: tag for tag, label in hp.tag2label.items()}
    # for tag, label in hp.tag2label.items():
    #     label2tag[label] = tag
    for seqs, labels, seqs_len in get_batch(data,
                                            hp.batch_size,
                                            hp.vocab_path,
                                            hp.tag2label,
                                            shuffle=False):
        _logits, _transition_params = sess.run(
            [logits, transition_params],
            feed_dict={
                model.sent_input: seqs,
                model.label: labels,
                model.sequence_length: seqs_len
            })
        # seqs_list.extend(seqs)
        label_references.extend(labels)
        for logit, seq_len in zip(_logits, seqs_len):
            viterbi_seq, _ = tf.contrib.crf.viterbi_decode(
                logit[:seq_len], _transition_params)
            labels_pred.append(viterbi_seq)
        # print(seqs_list)
        # print(label_references)
    model_pred = []
    epoch_num = str(epoch) if epoch != None else 'test'
    if not os.path.exists(hp.result_path): os.mkdir(hp.result_path)
    with open(hp.result_path + 'results_epoch_' + (epoch_num),
              'w',
              encoding='utf-8') as fw:
        for label_pred, (sent, tag) in zip(labels_pred, data):
            fw.write(''.join(sent) + '\n')
            fw.write(''.join(tag) + '\n')
            tag_pred = [label2tag[i] for i in label_pred]
            fw.write(''.join(tag_pred) + '\n')
            sent_res = []
            if len(label_pred) != len(sent):
                print(sent)
                print(len(label_pred))
                print(len(sent))
            for i in range(len(sent)):
                sent_res.append([sent[i], tag[i], tag_pred[i]])
            model_pred.append(sent_res)
    # label_path = os.path.join(hp.result_path, 'label_' + epoch_num)
    # metric_path = os.path.join(hp.result_path, 'result_metric_' + epoch_num)
    result = conlleval(model_pred)
    print(result)
コード例 #8
0
ファイル: run.py プロジェクト: xzk-seu/xzk_thesis
def evaluate(eval_model, data_source):
    eval_model.eval()  # Turn on the evaluation mode
    total_loss = 0.
    src_mask = model.generate_square_subsequent_mask(bptt).to(device)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, bptt):
            data, targets = get_batch(data_source, i)
            if data.size(0) != bptt:
                src_mask = model.generate_square_subsequent_mask(
                    data.size(0)).to(device)
            output = eval_model(data, src_mask)
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat, targets).item()
    return total_loss / (len(data_source) - 1)
コード例 #9
0
def eval(logdir='logdir/default/train1', queue=False):
    # Load graph
    model = Model(mode="test1", batch_size=hp.Test1.batch_size, queue=queue)

    # Accuracy
    acc_op = model.acc_net1()

    # Loss
    loss_op = model.loss_net1()

    # Summary
    summ_op = summaries(acc_op, loss_op)

    session_conf = tf.ConfigProto(
        allow_soft_placement=True,
        device_count={
            'CPU': 1,
            'GPU': 0
        },
    )
    with tf.Session(config=session_conf) as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        writer = tf.summary.FileWriter(logdir, sess.graph)

        # Load trained model
        sess.run(tf.global_variables_initializer())
        model.load(sess, 'train1', logdir=logdir)

        if queue:
            summ, acc, loss = sess.run([summ_op, acc_op, loss_op])
        else:
            mfcc, ppg = get_batch(model.mode, model.batch_size)
            summ, acc, loss = sess.run([summ_op, acc_op, loss_op],
                                       feed_dict={
                                           model.x_mfcc: mfcc,
                                           model.y_ppgs: ppg
                                       })

        writer.add_summary(summ)

        print("acc:", acc)
        print("loss:", loss)
        print('\n')

        writer.close()

        coord.request_stop()
        coord.join(threads)
コード例 #10
0
def eval(logdir, hparams):
    # Load graph
    model = Model(mode="test1", hparams=hparams)

    # Accuracy
    acc_op = model.acc_net1()

    # Loss
    loss_op = model.loss_net1()

    # Summary
    summ_op = summaries(acc_op, loss_op)

    #session_conf = tf.ConfigProto(
    #    allow_soft_placement=True,
    #    device_count={'CPU': 1, 'GPU': 0},
    #)

    session_conf = tf.ConfigProto()
    session_conf.gpu_options.per_process_gpu_memory_fraction = 0.9

    with tf.Session(config=session_conf) as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        writer = tf.summary.FileWriter(logdir, sess.graph)

        # Load trained model
        sess.run(tf.global_variables_initializer())
        model.load(sess, 'train1', logdir=logdir)

        mfcc, ppg = get_batch(model.mode, model.batch_size)
        summ, acc, loss = sess.run([summ_op, acc_op, loss_op],
                                   feed_dict={
                                       model.x_mfcc: mfcc,
                                       model.y_ppgs: ppg
                                   })

        writer.add_summary(summ)

        print("acc:", acc)
        print("loss:", loss)
        print('\n')

        writer.close()

        coord.request_stop()
        coord.join(threads)
コード例 #11
0
ファイル: train.py プロジェクト: zxhaijm/tacotron_asr
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        self.is_training = is_training
        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.num_batch = get_batch()
            else:  # Evaluation
                self.x = tf.placeholder(tf.float32,
                                        shape=(None, None, hp.n_mels * hp.r))
                self.y = tf.placeholder(tf.int32, shape=(None, hp.max_len))

            self.decoder_inputs = embed(shift_by_one(self.y), len(char2idx),
                                        hp.embed_size)  # (N, T', E)

            with tf.variable_scope('net'):
                # Encoder
                self.memory = encode(
                    self.x, is_training=is_training)  # (N, T, hp.n_mels*hp.r)

                # Decoder
                self.outputs = decode(self.decoder_inputs,
                                      self.memory,
                                      is_training=is_training)  # (N, T', E)
                self.logprobs = tf.log(tf.nn.softmax(self.outputs) + 1e-10)
                self.preds = tf.arg_max(self.outputs, dimension=-1)

            if is_training:
                # Loss
                self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.y, logits=self.outputs)

                # Target masking
                self.istarget = tf.to_float(tf.not_equal(self.y, 0))
                self.mean_loss = tf.reduce_sum(self.loss * self.istarget) / (
                    tf.reduce_sum(self.istarget) + 1e-7)

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                self.train_op = self.optimizer.minimize(
                    self.mean_loss, global_step=self.global_step)

                # Summary
                tf.summary.scalar('mean_loss', self.mean_loss)
                self.merged = tf.summary.merge_all()
コード例 #12
0
def test(hp):
    # Loading hyper params
    load_hparams(hp, hp.ckpt)

    logging.info("# Prepare test batches")
    test_batches, num_test_batches, num_test_samples = get_batch(
        hp.test1,
        hp.test1,
        100000,
        100000,
        hp.vocab,
        hp.test_batch_size,
        shuffle=False)
    iter = tf.data.Iterator.from_structure(test_batches.output_types,
                                           test_batches.output_shapes)
    xs, ys = iter.get_next()

    test_init_op = iter.make_initializer(test_batches)

    logging.info("# Load model")
    model = Transformer(hp)

    logging.info("# Session")
    with tf.Session() as sess:
        ckpt_ = tf.train.latest_checkpoint(hp.ckpt)
        ckpt = ckpt_ if ckpt_ else hp.ckpt
        saver = tf.train.Saver()

        saver.restore(sess, ckpt)

        y_hat, mean_loss = model.eval(sess, test_init_op, xs, ys,
                                      num_test_batches)

        logging.info("# get hypotheses")
        hypotheses = get_hypotheses(num_test_samples, y_hat, model.idx2token)

        logging.info("# write results")
        model_output = os.path.split(ckpt)[-1]
        if not os.path.exists(hp.testdir):
            os.makedirs(hp.testdir)
        translation = os.path.join(hp.testdir, model_output)
        with open(translation, 'w', encoding="utf-8") as fout:
            fout.write("\n".join(hypotheses))

        logging.info("# calc bleu score and append it to translation")
        calc_bleu_nltk(hp.test2, translation)
コード例 #13
0
ファイル: templates.py プロジェクト: Traeyee/DickLearning
def export_pb_template(class_model):
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    os.environ['CUDA_VISIBLE_DEVICES'] = "-1"

    logging.info("# hparams")
    hparams = Hparams()
    parser = hparams.parser
    hp = parser.parse_args()
    load_hparams(hp, hp.logdir)
    context = Context(hp)

    params = {"maxlens": 0x3f3f}
    eval_batches, num_eval_batches, num_eval_samples = get_batch(
        fpath=hp.eval_data,
        task_type=hp.task_type,
        input_indices=context.input_indices,
        vocabs=context.vocabs,
        context=params,
        batch_size=hp.batch_size,
        shuffle=True)

    # create a iterator of the correct shape and type
    iterr = tf.data.Iterator.from_structure(eval_batches.output_types,
                                            eval_batches.output_shapes)
    inputs_and_target = iterr.get_next()

    model = class_model(context)
    _ = model.eval(inputs_and_target[:-1], inputs_and_target[-1])
    inference_name = model.get_inference_op_name()
    logging.info("inference_node_name:%s" % inference_name)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        ckpt = tf.train.latest_checkpoint(hp.logdir)
        saver.restore(sess, ckpt)
        inference_node_name = inference_name[:inference_name.find(":")]
        graph_def = tf.graph_util.convert_variables_to_constants(
            sess, sess.graph_def, output_node_names=[inference_node_name])
        tf.train.write_graph(graph_def,
                             './model',
                             '%s.pb' % hp.pb_name,
                             as_text=False)
        save_operation_specs(os.path.join("./model", '%s.ops' % hp.pb_name))
コード例 #14
0
def eval(model, f, ids2tokens, idx2phr):
    model.eval()

    Y, Y_hat = [], []
    with torch.no_grad():
        x, y = get_batch(hp.max_span, hp.batch_size, hp.n_classes, False)
        x = x.cuda()

        _, y_hat, _ = model(x)  # y_hat: (N, n_candidates)

        x = x.cpu().numpy().tolist()
        y = y.cpu().numpy().tolist()
        y_hat = y_hat.cpu().numpy().tolist()

        Y.extend(y)
        Y_hat.extend(y_hat)

        # monitoring
        pointer = random.randint(0, len(x) - 1)
        xx, yy, yy_hat = x[pointer], y[pointer], y_hat[pointer]  # one sample

        tokens = ids2tokens(xx)  # this is a function.
        ctx = " ".join(tokens).replace(
            " ##", "").split("[PAD]")[0]  # bert detokenization
        gt = idx2phr[yy]  # this is a dict.
        ht = " | ".join(idx2phr[each] for each in yy_hat)

        print(f"context: {ctx}")
        print(f"ground truth: {gt}")
        print(f"predictions: {ht}")

    # calc acc.
    n_samples = len(Y)
    n_correct = 0
    for y, y_hat in zip(Y, Y_hat):
        if y in y_hat:
            n_correct += 1
    acc = n_correct / n_samples
    print(f"acc@{hp.n_candidates}: %.2f" % acc)

    acc = str(round(acc, 2))

    torch.save(model.state_dict(), f"{f}_ACC{acc}.pt")
コード例 #15
0
ファイル: eval1.py プロジェクト: QianQQ/Voice-Conversion
def eval(logdir='logdir/default/train1', queue=False):
    # Load graph
    model = Model(mode="test1", batch_size=hp.Test1.batch_size, queue=queue)

    # Accuracy
    acc_op = model.acc_net1()

    # Loss
    loss_op = model.loss_net1()

    # Summary
    summ_op = summaries(acc_op, loss_op)

    session_conf = tf.ConfigProto(
        allow_soft_placement=True,
        device_count={'CPU': 1, 'GPU': 0},
    )
    with tf.Session(config=session_conf) as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        writer = tf.summary.FileWriter(logdir, sess.graph)

        # Load trained model
        sess.run(tf.global_variables_initializer())
        model.load(sess, 'train1', logdir=logdir)

        if queue:
            summ, acc, loss = sess.run([summ_op, acc_op, loss_op])
        else:
            mfcc, ppg = get_batch(model.mode, model.batch_size)
            summ, acc, loss = sess.run([summ_op, acc_op, loss_op], feed_dict={model.x_mfcc: mfcc, model.y_ppgs: ppg})

        writer.add_summary(summ)

        print("acc:", acc)
        print("loss:", loss)
        print('\n')

        writer.close()

        coord.request_stop()
        coord.join(threads)
コード例 #16
0
def train_and_eval(model, optimizer, criterion, ids2tokens, idx2phr):
    model.train()
    for step in tqdm(range(hp.n_train_steps + 1)):
        x, y = get_batch(hp.max_span, hp.batch_size, hp.n_classes, True)
        x = x.cuda()
        y = y.cuda()

        optimizer.zero_grad()

        logits, y_hat, _ = model(x)  # logits: (N, classes), y_hat: (N,)

        loss = criterion(logits, y)
        loss.backward()

        optimizer.step()

        # evaluation
        if step and step % 500 == 0:  # monitoring
            eval(model, f'{hp.logdir}/{step}', ids2tokens, idx2phr)
            print(f"step: {step}, loss: {loss.item()}")
            model.train()
コード例 #17
0
    def __init__(self,is_training = True, vocab_size = 100000, demo = False):
        # Build the computational graph when initializing
        self.is_training = is_training
        self.vocab_size = vocab_size
        self.graph = tf.Graph()
        with self.graph.as_default():
            self.dropout = tf.placeholder_with_default(0.0, (), name="dropout")
            self.global_step = tf.Variable(0, name='global_step', trainable=False)
            if demo:
                self.demo_inputs()
            else:
                self.data, self.num_batch = get_batch(is_training = is_training)
                (self.passage_w,
                self.question_w,
                self.passage_c,
                self.question_c,
                self.indices) = self.data

            self.passage_mask = tf.cast(1 - tf.cast(tf.equal(self.passage_w,1), tf.float32), tf.bool)
            self.question_mask = tf.cast(1 - tf.cast(tf.equal(self.question_w,1), tf.float32), tf.bool)
            self.passage_len = tf.reduce_sum(tf.cast(self.passage_mask, tf.int32), axis=1)
            self.question_len = tf.reduce_sum(tf.cast(self.question_mask, tf.int32), axis=1)

            self.encode_ids()
            self.embedding_encoder()
            self.context_to_query()
            self.model_encoder()
            self.output_layer()

            if Params.decay:
                self.apply_ema()

            if is_training:
                self.loss_function()
                self.summary()
                self.init_op = tf.global_variables_initializer()

            total_params()
コード例 #18
0
ファイル: inference.py プロジェクト: qsong4/FI
    def test_file(self):
        test_file = self.hp.test_file
        test_batches, num_test_batches, num_test_samples = get_batch(
            test_file, self.hp.maxlen, self.hp.vocab, self.hp.batch_size)
        iter = tf.data.Iterator.from_structure(test_batches.output_types,
                                               test_batches.output_shapes)
        data_element = iter.get_next()
        test_init_op = iter.make_initializer(test_batches)

        self.sess.run(test_init_op)
        x, y, x_len, y_len, labels = self.sess.run(data_element)
        feed_dict = self.m.create_feed_dict(x, y, x_len, y_len, labels)
        total_steps = 1 * num_test_batches
        total_acc = 0.0
        total_loss = 0.0
        for i in tqdm(range(total_steps + 1)):
            # dev_acc, dev_loss = sess.run([dev_accuracy_op, dev_loss_op])
            test_acc, test_loss = self.sess.run([self.acc_op, self.loss_op],
                                                feed_dict=feed_dict)
            total_acc += test_acc
            total_loss += test_loss

        return total_acc / total_steps
コード例 #19
0
def train(config):
    model = ConvSeq2Seq(config)
    trainer = Trainer(config, model)
    graph_handler = GraphHandler(config)
    sess = tf.Session()
    graph_handler.initialize(sess)

    for i, batch in tqdm(enumerate(get_batch(num_epoch=config.num_epoch))):
        global_step = sess.run(model.global_step) + 1
        loss, acc, summary = trainer.run_step(sess, batch)
        print "global_step: %d,    loss: %f,     acc: %f" % (global_step, loss, acc)

        get_summary = global_step % config.log_period == 0
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        if global_step % config.save_period == 0:
            graph_handler.save_model(sess, global_step)

            if global_step % config.eval_period == 0:
                pass

    if global_step % config.save_period != 0:
        graph_handler.save_model(sess)
コード例 #20
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()

        with self.graph.as_default():
            # Build vocab
            if is_training:
                _, idx2char = learn_vocab()
                store_vocab(idx2char)

            if is_training:
                self.x, self.y, self.z, self.num_batch = get_batch()
            else:  # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(None, None))
                self.y = tf.placeholder(tf.float32,
                                        shape=(None, None, hp.n_mels * hp.r))

            self.decoder_inputs = shift_by_one(self.y)

            with tf.variable_scope("net"):
                # Encoder
                self.memory = encode(self.x,
                                     is_training=is_training)  # (N, T, E)

                # Decoder
                self.outputs1 = decode1(
                    self.decoder_inputs, self.memory,
                    is_training=is_training)  # (N, T', hp.n_mels*hp.r)
                self.outputs2 = decode2(
                    self.outputs1,
                    is_training=is_training)  # (N, T', (1+hp.n_fft//2)*hp.r)

            if is_training:
                # Loss
                if hp.loss_type == "l1":  # L1 loss
                    self.loss1 = tf.abs(self.outputs1 - self.y)
                    self.loss2 = tf.abs(self.outputs2 - self.z)
                else:  # L2 loss
                    self.loss1 = tf.squared_difference(self.outputs1, self.y)
                    self.loss2 = tf.squared_difference(self.outputs2, self.z)

                # Target masking
                if hp.target_zeros_masking:
                    self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.))
                    self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.))

                self.mean_loss1 = tf.reduce_mean(self.loss1)
                self.mean_loss2 = tf.reduce_mean(self.loss2)
                self.mean_loss = self.mean_loss1 + self.mean_loss2

                # Logging
                ## histograms
                self.expected1_h = tf.reduce_mean(tf.reduce_mean(self.y, -1),
                                                  0)
                self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1),
                                             0)

                self.expected2_h = tf.reduce_mean(tf.reduce_mean(self.z, -1),
                                                  0)
                self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1),
                                             0)

                ## images
                self.expected1_i = tf.expand_dims(
                    tf.reduce_mean(self.y[:1], -1, keep_dims=True), 1)
                self.got1_i = tf.expand_dims(
                    tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1)

                self.expected2_i = tf.expand_dims(
                    tf.reduce_mean(self.z[:1], -1, keep_dims=True), 1)
                self.got2_i = tf.expand_dims(
                    tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1)

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                self.train_op = self.optimizer.minimize(
                    self.mean_loss, global_step=self.global_step)

                # Summmary
                tf.summary.scalar('mean_loss1', self.mean_loss1)
                tf.summary.scalar('mean_loss2', self.mean_loss2)
                tf.summary.scalar('mean_loss', self.mean_loss)

                tf.summary.histogram('expected_values1', self.expected1_h)
                tf.summary.histogram('gotten_values1', self.got1_h)
                tf.summary.histogram('expected_values2', self.expected2_h)
                tf.summary.histogram('gotten values2', self.got2_h)

                tf.summary.image("expected_values1", self.expected1_i * 255)
                tf.summary.image("gotten_values1", self.got1_i * 255)
                tf.summary.image("expected_values2", self.expected2_i * 255)
                tf.summary.image("gotten_values2", self.got2_i * 255)

                self.merged = tf.summary.merge_all()
コード例 #21
0
ファイル: train.py プロジェクト: Edresson/Text2World
    def __init__(self, num=1, mode="train"):
        '''
        Args:
          mode: Either "train" or "synthesize".
        '''
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        # Set flag
        training = True if mode == "train" else False

        # Graph
        # Data Feeding
        ## L: Text. (B, N), int32
        ## world: World Vocoder concatenate tensor.(B, 8*T/r, num_lf0+num_mgc+num_bap) float32
        if mode == "train":
            self.L, self.worlds, self.worlds_WSR, self.fnames, self.num_batch = get_batch(
            )
            self.prev_max_attentions = tf.ones(shape=(hp.B, ), dtype=tf.int32)
            self.gts = tf.convert_to_tensor(guided_attention())
        else:  # Synthesize
            self.L = tf.placeholder(tf.int32, shape=(None, None))
            self.worlds = tf.placeholder(
                tf.float32,
                shape=(None, None,
                       hp.num_bap + hp.num_lf0 + hp.num_mgc + hp.num_vuv))
            self.prev_max_attentions = tf.placeholder(tf.int32, shape=(None, ))
            self.gts = tf.convert_to_tensor(guided_attention())

        if num == 1 or (not training):
            with tf.variable_scope("Text2World"):
                # Get S or decoder inputs. (B, 8*T/r, num_lf0+num_mgc+num_bap)
                self.S = tf.concat((tf.zeros_like(
                    self.worlds[:, :1, :]), self.worlds[:, :-1, :]), 1)

                # Networks
                with tf.variable_scope("TextEnc"):
                    self.K, self.V = TextEnc(self.L,
                                             training=training)  # (N, Tx, e)

                with tf.variable_scope("AudioEnc"):
                    self.Q = AudioEnc(self.S, training=training)

                with tf.variable_scope("Attention"):
                    # R: (B, T/r, 2d)
                    # alignments: (B, N, T/r)
                    # max_attentions: (B,)
                    self.R, self.alignments, self.max_attentions = Attention(
                        self.Q,
                        self.K,
                        self.V,
                        mononotic_attention=(not training),
                        prev_max_attentions=self.prev_max_attentions)
                with tf.variable_scope("AudioDec"):
                    self.Y_logits, self.Y = AudioDec(
                        self.R,
                        training=training)  # (B, T/r, num_lf0+num_mgc+num_bap)
        else:  # num==2 & training. Note that during training,
            with tf.variable_scope("WSRN"):
                self.Z_logits, self.Z = WSRN(self.worlds, training=training)

        if not training:
            # During inference, the predicted world values are fed.
            with tf.variable_scope("WSRN"):
                self.Z_logits, self.Z = WSRN(self.Y, training=training)

        with tf.variable_scope("gs"):
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)

        if training:
            if num == 1:  # Text2World
                # world L1 loss
                self.loss_worlds = tf.losses.mean_squared_error(
                    self.worlds, self.Y)
                #self.loss_worlds = tf.reduce_mean(tf.abs(self.Y - self.worlds))

                # world binary divergence loss
                #self.loss_bd1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.Y_logits, labels=self.worlds))

                # guided_attention loss
                self.A = tf.pad(self.alignments, [(0, 0), (0, hp.max_N),
                                                  (0, hp.max_T)],
                                mode="CONSTANT",
                                constant_values=-1.)[:, :hp.max_N, :hp.max_T]
                self.attention_masks = tf.to_float(tf.not_equal(self.A, -1))
                self.loss_att = tf.reduce_sum(
                    tf.abs(self.A * self.gts) * self.attention_masks)
                self.mask_sum = tf.reduce_sum(self.attention_masks)
                self.loss_att /= self.mask_sum

                # total loss
                self.loss = self.loss_worlds + self.loss_att  #self.loss_bd1 +

                tf.summary.scalar('train/loss_worlds', self.loss_worlds)
                #tf.summary.scalar('train/loss_bd1', self.loss_bd1)
                tf.summary.scalar('train/loss_att', self.loss_att)
                tf.summary.image(
                    'train/world_gt',
                    tf.expand_dims(tf.transpose(self.worlds[:1], [0, 2, 1]),
                                   -1))
                tf.summary.image(
                    'train/world_hat',
                    tf.expand_dims(tf.transpose(self.Y[:1], [0, 2, 1]), -1))
            else:  #WSRN
                # world L1 loss
                self.loss_WSR = tf.losses.mean_squared_error(
                    self.Z, self.worlds_WSR)

                # world binary divergence loss
                #self.loss_bd2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.Z_logits, labels=self.worlds_WSR))

                # total loss
                self.loss = self.loss_WSR  #+ self.loss_bd2

                tf.summary.scalar('train/loss_world_SSRN', self.loss_WSR)
                #tf.summary.scalar('train/loss_bd2', self.loss_bd2)

            # Training Scheme
            self.lr = learning_rate_decay(hp.lr, self.global_step)
            self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
            tf.summary.scalar("lr", self.lr)

            ## gradient clipping
            self.gvs = self.optimizer.compute_gradients(self.loss)
            self.clipped = []
            for grad, var in self.gvs:
                grad = tf.clip_by_value(grad, -1., 1.)
                self.clipped.append((grad, var))
                self.train_op = self.optimizer.apply_gradients(
                    self.clipped, global_step=self.global_step)

            # Summary
            self.merged = tf.summary.merge_all()
コード例 #22
0
from tqdm import tqdm
import os
import logging
from data_load import get_batch

from utils import save_hparams, save_variable_specs, get_hypotheses, calc_bleu

logging.basicConfig(level=logging.INFO)
logging.info("# hparams")
hparams = Hparams()
hp = hparams.parse_arg()
save_hparams(hp, hp.logdir)

logging.info("# Prepare train/eval batches")

train_batches, train_num_batches, train_samples = get_batch(hp.train1, hp.train2, hp.maxlen1, hp.maxlen2, hp.vocab, shuffle = True)

eval_batches, eval_num_batches, eval_samples = get_batch(hp.eval1, hp.eval2, hp.maxlen1, hp.maxlen2, hp.vocab, shuffle = False)

# create a iterator of the correct shape and type
iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes)
xs, ys = iter.get_next()

train_init_op = iter.make_initializer(train_batches)
eval_init_op = iter.make_initializer(eval_batches)


logging.info("# Load model")
m = Transformer(hp)

loss, train_op, global_step, train_summaries = m.train(xs, ys)
コード例 #23
0
import logging
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
logging.basicConfig(level=logging.INFO)

logging.info("# hparams")
hparams = Hparams()
parser = hparams.parser
hp = parser.parse_args()
save_hparams(hp, hp.logdir)

logging.info("# Prepare train/eval batches")
train_batches, num_train_batches, num_train_samples = get_batch(
    hp.train1,
    hp.train2,
    hp.maxlen1,
    hp.maxlen2,
    hp.vocab,
    hp.train_paraphrased,
    hp.batch_size,
    shuffle=True,
    paraphrase_type=hp.paraphrase_type)
eval_batches, num_eval_batches, num_eval_samples = get_batch(
    hp.eval1,
    hp.eval2,
    1000,
    1000,
    hp.vocab,
    hp.eval_paraphrased,
    hp.batch_size,
    shuffle=False,
    paraphrase_type=hp.paraphrase_type)
コード例 #24
0
    def __init__(self, mode="train"):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        # Set phase
        is_training=True if mode=="train" else False

        # Graph
        # Data Feeding
        # x: Text. (N, Tx)
        # y: Reduced melspectrogram. (N, Ty//r, n_mels*r)
        # z: Magnitude. (N, Ty, n_fft//2+1)
        if mode=="train":
            self.x, self.y, self.z, self.fnames, self.num_batch = get_batch()
        elif mode=="eval":
            self.x = tf.placeholder(tf.int32, shape=(None, None))
            self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r))
            self.z = tf.placeholder(tf.float32, shape=(None, None, 1+hp.n_fft//2))
            self.fnames = tf.placeholder(tf.string, shape=(None,))
        else: # Synthesize
            self.x = tf.placeholder(tf.int32, shape=(None, None))
            self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r))

        # Get encoder/decoder inputs
        self.encoder_inputs = embed(self.x, len(hp.vocab), hp.embed_size) # (N, T_x, E)
        self.decoder_inputs = tf.concat((tf.zeros_like(self.y[:, :1, :]), self.y[:, :-1, :]), 1) # (N, Ty/r, n_mels*r)
        self.decoder_inputs = self.decoder_inputs[:, :, -hp.n_mels:] # feed last frames only (N, Ty/r, n_mels)

        # Networks
        with tf.variable_scope("net"):
            # Encoder
            self.memory = encoder(self.encoder_inputs, is_training=is_training) # (N, T_x, E)

            # Decoder1
            self.y_hat, self.alignments = decoder1(self.decoder_inputs,
                                                     self.memory,
                                                     is_training=is_training) # (N, T_y//r, n_mels*r)
            # Decoder2 or postprocessing
            self.z_hat = decoder2(self.y_hat, is_training=is_training) # (N, T_y//r, (1+n_fft//2)*r)

        # monitor
        self.audio = tf.py_func(spectrogram2wav, [self.z_hat[0]], tf.float32)

        if mode in ("train", "eval"):
            # Loss
            self.loss1 = tf.reduce_mean(tf.abs(self.y_hat - self.y))
            self.loss2 = tf.reduce_mean(tf.abs(self.z_hat - self.z))
            self.loss = self.loss1 + self.loss2

            # Training Scheme
            self.global_step = tf.Variable(0, name='global_step', trainable=False)
            self.lr = learning_rate_decay(hp.lr, global_step=self.global_step)
            self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)

            ## gradient clipping
            self.gvs = self.optimizer.compute_gradients(self.loss)
            self.clipped = []
            for grad, var in self.gvs:
                grad = tf.clip_by_norm(grad, 5.)
                self.clipped.append((grad, var))
            self.train_op = self.optimizer.apply_gradients(self.clipped, global_step=self.global_step)

            # Summary
            tf.summary.scalar('{}/loss1'.format(mode), self.loss1)
            tf.summary.scalar('{}/loss'.format(mode), self.loss)
            tf.summary.scalar('{}/lr'.format(mode), self.lr)

            tf.summary.image("{}/mel_gt".format(mode), tf.expand_dims(self.y, -1), max_outputs=1)
            tf.summary.image("{}/mel_hat".format(mode), tf.expand_dims(self.y_hat, -1), max_outputs=1)
            tf.summary.image("{}/mag_gt".format(mode), tf.expand_dims(self.z, -1), max_outputs=1)
            tf.summary.image("{}/mag_hat".format(mode), tf.expand_dims(self.z_hat, -1), max_outputs=1)

            tf.summary.audio("{}/sample".format(mode), tf.expand_dims(self.audio, 0), hp.sr)
            self.merged = tf.summary.merge_all()
コード例 #25
0
 def __init__(self, is_training=True):
     self.graph = tf.Graph()
     
     with self.graph.as_default():
         if is_training:
             self.x, self.y, self.z, self.num_batch = get_batch()
             self.decoder_inputs = shift_by_one(self.y)
             
             # Make sure that batch size was multiplied by # gpus.
             # Now we split the mini-batch data by # gpus.
             self.x = tf.split(self.x, hp.num_gpus, 0)
             self.y = tf.split(self.y, hp.num_gpus, 0)
             self.z = tf.split(self.z, hp.num_gpus, 0)
             self.decoder_inputs = tf.split(self.decoder_inputs, hp.num_gpus, 0)
             
             # Sequence lengths for masking
             self.x_lengths = tf.to_int32(tf.reduce_sum(tf.sign(tf.abs(self.x)), -1)) # (N,)
             self.x_masks = tf.to_float(tf.expand_dims(tf.sign(tf.abs(self.x)), -1)) # (N, T, 1)
             # optimizer
             self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
         
             self.losses, self.grads_and_vars_list = [], []
             for i in range(hp.num_gpus):
                 with tf.variable_scope('net', reuse=bool(i)):
                     with tf.device('/gpu:{}'.format(i)):
                         with tf.name_scope('gpu_{}'.format(i)):
                             # Encoder
                             self.memory = encode(self.x[i], is_training=is_training) # (N, T, E)
                             
                             # Decoder 
                             self.outputs1 = decode1(self.decoder_inputs[i], 
                                                      self.memory,
                                                      is_training=is_training) # (N, T', hp.n_mels*hp.r)
                             self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r)
           
                             # Loss
                             if hp.loss_type=="l1": # L1 loss
                                 self.loss1 = tf.abs(self.outputs1 - self.y[i])
                                 self.loss2 = tf.abs(self.outputs2 - self.z[i])
                             else: # L2 loss
                                 self.loss1 = tf.squared_difference(self.outputs1, self.y[i])
                                 self.loss2 = tf.squared_difference(self.outputs2, self.z[i])
                                 
                             # Target masking
                             if hp.target_zeros_masking:
                                 self.loss1 *= tf.to_float(tf.not_equal(self.y[i], 0.))
                                 self.loss2 *= tf.to_float(tf.not_equal(self.z[i], 0.))
                             
                             self.loss1 = tf.reduce_mean(self.loss1)
                             self.loss2 = tf.reduce_mean(self.loss2)
                             self.loss = self.loss1 + self.loss2   
                             
                             self.losses.append(self.loss)
                             self.grads_and_vars = self.optimizer.compute_gradients(self.loss) 
                             self.grads_and_vars_list.append(self.grads_and_vars)    
             
             with tf.device('/cpu:0'):
                 # Aggregate losses, then calculate average loss.
                 self.mean_loss = tf.add_n(self.losses) / len(self.losses)
                  
                 #Aggregate gradients, then calculate average gradients.
                 self.mean_grads_and_vars = []
                 for grads_and_vars in zip(*self.grads_and_vars_list):
                     grads = []
                     for grad, var in grads_and_vars:
                         if grad is not None:
                             grads.append(tf.expand_dims(grad, 0))
                     mean_grad = tf.reduce_mean(tf.concat(grads, 0), 0) #()
                     self.mean_grads_and_vars.append((mean_grad, var))
              
             # Training Scheme
             self.global_step = tf.Variable(0, name='global_step', trainable=False)
             self.train_op = self.optimizer.apply_gradients(self.mean_grads_and_vars, self.global_step)
              
             # Summmary 
             tf.summary.scalar('mean_loss', self.mean_loss)
             self.merged = tf.summary.merge_all()
             
         else: # Evaluation
             self.x = tf.placeholder(tf.int32, shape=(None, None))
             self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r))
             self.decoder_inputs = shift_by_one(self.y)
             with tf.variable_scope('net'):
                 # Encoder
                 self.memory = encode(self.x, is_training=is_training) # (N, T, E)
                  
                 # Decoder
                 self.outputs1 = decode1(self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r)
                 self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r)
コード例 #26
0
    def add_data(self, reuse=None):
        '''
        Add either variables (for training) or placeholders (for synthesis) to the graph
        '''
        # Data Feeding
        ## L: Text. (B, N), int32
        ## mels: Reduced melspectrogram. (B, T/r, n_mels) float32
        ## mags: Magnitude. (B, T, n_fft//2+1) float32
        hp = self.hp

        if self.mode is 'train':
            batchdict = get_batch(hp, self.get_batchsize())

            if 0:
                print(batchdict)
                print(batchdict.keys())
                sys.exit('vsfbd')

            self.L, self.mels, self.mags, self.fnames, self.num_batch = \
                batchdict['text'], batchdict['mel'], batchdict['mag'], batchdict['fname'], batchdict['num_batch']

            if hp.multispeaker:
                ## check multispeaker config is valid:- TODO: to config validation?
                for position in hp.multispeaker:
                    assert position in ['text_encoder_input', 'text_encoder_towards_end', \
                                'audio_decoder_input', 'ssrn_input', 'audio_encoder_input',\
                                'learn_channel_contributions', 'speaker_dependent_phones']
                self.speakers = batchdict['speaker']
            else:
                self.speakers = None
            if hp.attention_guide_dir:
                self.gts = batchdict['attention_guide']
            else:
                self.gts = tf.convert_to_tensor(get_global_attention_guide(hp))
            if hp.use_external_durations:
                self.durations = batchdict['duration']
            if hp.merlin_label_dir:
                self.merlin_label = batchdict['merlin_label']
            if 'position_in_phone' in hp.history_type:
                self.position_in_phone = batchdict['position_in_phone']
            batchsize = self.get_batchsize()
            self.prev_max_attentions = tf.ones(shape=(batchsize, ),
                                               dtype=tf.int32)

        ## TODO refactor to remove redundancy between the next 2 branches?
        elif self.mode is 'synthesize':  # synthesis
            self.L = tf.placeholder(tf.int32, shape=(None, None))
            self.speakers = None
            if hp.multispeaker:
                self.speakers = tf.placeholder(tf.int32, shape=(None, None))
            if hp.use_external_durations:
                self.durations = tf.placeholder(tf.float32,
                                                shape=(None, None, None))
            if hp.merlin_label_dir:
                self.merlin_label = tf.placeholder(tf.float32,
                                                   shape=(None, None,
                                                          hp.merlin_lab_dim))
            if 'position_in_phone' in hp.history_type:
                self.position_in_phone = tf.placeholder(tf.float32,
                                                        shape=(None, None, 1))
            self.mels = tf.placeholder(tf.float32,
                                       shape=(None, None, hp.n_mels))
            self.prev_max_attentions = tf.placeholder(tf.int32, shape=(None, ))
        elif self.mode is 'generate_attention':
            self.L = tf.placeholder(tf.int32, shape=(None, None))
            self.speakers = None
            if hp.multispeaker:
                self.speakers = tf.placeholder(tf.int32, shape=(None, None))
            if hp.use_external_durations:
                self.durations = tf.placeholder(tf.float32,
                                                shape=(None, None, None))
            if hp.merlin_label_dir:
                self.merlin_label = tf.placeholder(tf.float32,
                                                   shape=(None, None,
                                                          hp.merlin_lab_dim))
            if 'position_in_phone' in hp.history_type:
                self.position_in_phone = tf.placeholder(tf.float32,
                                                        shape=(None, None, 1))
            self.mels = tf.placeholder(tf.float32,
                                       shape=(None, None, hp.n_mels))
コード例 #27
0
ファイル: train1.py プロジェクト: QianQQ/Voice-Conversion
def train(logdir='logdir/default/train1', queue=True):
    model = Model(mode="train1", batch_size=hp.Train1.batch_size, queue=queue)

    # Loss
    loss_op = model.loss_net1()

    # Accuracy
    acc_op = model.acc_net1()

    # Training Scheme
    global_step = tf.Variable(0, name='global_step', trainable=False)

    optimizer = tf.train.AdamOptimizer(learning_rate=hp.Train1.lr)
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'net/net1')
        train_op = optimizer.minimize(loss_op, global_step=global_step, var_list=var_list)

    # Summary
    for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'net/net1'):
        tf.summary.histogram(v.name, v)
    tf.summary.scalar('net1/train/loss', loss_op)
    tf.summary.scalar('net1/train/acc', acc_op)
    summ_op = tf.summary.merge_all()

    session_conf = tf.ConfigProto(
        gpu_options=tf.GPUOptions(
            allow_growth=True,
        ),
    )
    # Training
    with tf.Session(config=session_conf) as sess:
        # Load trained model
        sess.run(tf.global_variables_initializer())
        model.load(sess, 'train1', logdir=logdir)

        writer = tf.summary.FileWriter(logdir, sess.graph)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        for epoch in range(1, hp.Train1.num_epochs + 1):
            for step in tqdm(range(model.num_batch), total=model.num_batch, ncols=70, leave=False, unit='b'):
                if queue:
                    sess.run(train_op)
                else:
                    mfcc, ppg = get_batch(model.mode, model.batch_size)
		    #print("MFCC shape: {}".format(mfcc.shape))
		    #print("types: {} and {}".format(mfcc.dtype, ppg.dtype))
		    #print("PPG shape: {}".format(ppg.shape))
                    sess.run(train_op, feed_dict={model.x_mfcc: mfcc, model.y_ppgs: ppg})

            # Write checkpoint files at every epoch
            summ, gs = sess.run([summ_op, global_step], feed_dict={model.x_mfcc:  mfcc, model.y_ppgs: ppg})
	    # There was a problem where in certain environments placeholder must be fed for these ops
            if epoch % hp.Train1.save_per_epoch == 0:
                tf.train.Saver().save(sess, '{}/epoch_{}_step_{}'.format(logdir, epoch, gs))

            # Write eval accuracy at every epoch
            with tf.Graph().as_default():
                eval1.eval(logdir=logdir, queue=False)

            writer.add_summary(summ, global_step=gs)

        writer.close()
        coord.request_stop()
        coord.join(threads)
コード例 #28
0
ファイル: train.py プロジェクト: bo1yuan/Somiao-Pinyin
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.num_batch = get_batch()
            else:  # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(None, hp.maxlen,))
                self.y = tf.placeholder(tf.int32, shape=(None, hp.maxlen,))

            # Load vocabulary
            pnyn2idx, _, hanzi2idx, _ = load_vocab()

            # Character Embedding for x
            enc = embed(self.x, len(pnyn2idx), hp.embed_size, scope="emb_x")

            # Encoder pre-net
            prenet_out = prenet(enc,
                                num_units=[hp.embed_size, hp.embed_size // 2],
                                is_training=is_training)  # (N, T, E/2)

            # Encoder CBHG
            ## Conv1D bank
            enc = conv1d_banks(prenet_out,
                               K=hp.encoder_num_banks,
                               num_units=hp.embed_size // 2,
                               is_training=is_training)  # (N, T, K * E / 2)

            ## Max pooling
            enc = tf.layers.max_pooling1d(enc, 2, 1, padding="same")  # (N, T, K * E / 2)

            ## Conv1D projections
            enc = conv1d(enc, hp.embed_size // 2, 5, scope="conv1d_1")  # (N, T, E/2)
            enc = normalize(enc, type=hp.norm_type, is_training=is_training,
                            activation_fn=tf.nn.relu, scope="norm1")
            enc = conv1d(enc, hp.embed_size // 2, 5, scope="conv1d_2")  # (N, T, E/2)
            enc = normalize(enc, type=hp.norm_type, is_training=is_training,
                            activation_fn=None, scope="norm2")
            enc += prenet_out  # (N, T, E/2) # residual connections

            ## Highway Nets
            for i in range(hp.num_highwaynet_blocks):
                enc = highwaynet(enc, num_units=hp.embed_size // 2,
                                 scope='highwaynet_{}'.format(i))  # (N, T, E/2)

            ## Bidirectional GRU
            enc = gru(enc, hp.embed_size // 2, True, scope="gru1")  # (N, T, E)

            ## Readout
            self.outputs = tf.layers.dense(enc, len(hanzi2idx), use_bias=False)
            self.preds = tf.to_int32(tf.arg_max(self.outputs, dimension=-1))

            if is_training:
                self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=self.outputs)
                self.istarget = tf.to_float(tf.not_equal(self.y, tf.zeros_like(self.y)))  # masking
                self.hits = tf.to_float(tf.equal(self.preds, self.y)) * self.istarget
                self.acc = tf.reduce_sum(self.hits) / tf.reduce_sum(self.istarget)
                self.mean_loss = tf.reduce_sum(self.loss * self.istarget) / tf.reduce_sum(self.istarget)

                # Training Scheme
                self.global_step = tf.Variable(0, name='global_step', trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                self.train_op = self.optimizer.minimize(self.mean_loss, global_step=self.global_step)

                # Summary
                tf.summary.scalar('mean_loss', self.mean_loss)
                tf.summary.scalar('acc', self.acc)
                self.merged = tf.summary.merge_all()
コード例 #29
0
ファイル: train.py プロジェクト: WeCognize/deepvoice3
    def __init__(self, training=True):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        # Graph
        self.graph = tf.Graph()
        with self.graph.as_default():
            # Data Feeding
            ## x: Text. (N, Tx), int32
            ## y1: Reduced melspectrogram. (N, Ty//r, n_mels*r) float32
            ## y2: Reduced dones. (N, Ty//r,) int32
            ## z: Magnitude. (N, Ty, n_fft//2+1) float32
            if training:
                self.x, self.y1, self.y2, self.z, self.num_batch = get_batch()
                self.prev_max_attentions_li = tf.ones(shape=(hp.dec_layers, hp.batch_size), dtype=tf.int32)
            else: # Inference
                self.x = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.Tx))
                self.y1 = tf.placeholder(tf.float32, shape=(hp.batch_size, hp.Ty//hp.r, hp.n_mels*hp.r))
                self.prev_max_attentions_li = tf.placeholder(tf.int32, shape=(hp.dec_layers, hp.batch_size,))

            # Get decoder inputs: feed last frames only (N, Ty//r, n_mels)
            self.decoder_input = tf.concat((tf.zeros_like(self.y1[:, :1, -hp.n_mels:]), self.y1[:, :-1, -hp.n_mels:]), 1)

            # Networks
            with tf.variable_scope("encoder"):
                self.keys, self.vals = encoder(self.x, training=training) # (N, Tx, e)

            with tf.variable_scope("decoder"):
                # mel_logits: (N, Ty/r, n_mels*r)
                # done_output: (N, Ty/r, 2),
                # decoder_output: (N, Ty/r, e)
                # alignments_li: dec_layers*(Tx, Ty/r)
                # max_attentions_li: dec_layers*(N, T_y/r)
                self.mel_logits, self.done_output, self.decoder_output, self.alignments_li, self.max_attentions_li \
                    = decoder(self.decoder_input,
                             self.keys,
                             self.vals,
                             self.prev_max_attentions_li,
                             training=training)
                self.mel_output = tf.nn.sigmoid(self.mel_logits)

            with tf.variable_scope("converter"):
                # Restore shape
                self.converter_input = tf.reshape(self.decoder_output, (-1, hp.Ty, hp.embed_size//hp.r))
                self.converter_input = fc_block(self.converter_input,
                                                hp.converter_channels,
                                                activation_fn=tf.nn.relu,
                                                training=training) # (N, Ty, v)

                # Converter
                self.mag_logits = converter(self.converter_input, training=training) # (N, Ty, 1+n_fft//2)
                self.mag_output = tf.nn.sigmoid(self.mag_logits)

            self.global_step = tf.Variable(0, name='global_step', trainable=False)
            if training:
                # Loss
                self.loss_mels = tf.reduce_mean(tf.abs(self.mel_output - self.y1))
                self.loss_dones = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.done_output, labels=self.y2))
                self.loss_mags = tf.reduce_mean(tf.abs(self.mag_output - self.z))
                self.loss = self.loss_mels + self.loss_dones + self.loss_mags

                # Training Scheme
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                ## gradient clipping
                self.gvs = self.optimizer.compute_gradients(self.loss)
                self.clipped = []
                for grad, var in self.gvs:
                    grad = tf.clip_by_value(grad, -1. * hp.max_grad_val, hp.max_grad_val)
                    grad = tf.clip_by_norm(grad, hp.max_grad_norm)
                    self.clipped.append((grad, var))
                self.train_op = self.optimizer.apply_gradients(self.clipped, global_step=self.global_step)
                   
                # Summary
                tf.summary.scalar('Train_Loss/LOSS', self.loss)
                tf.summary.scalar('Train_Loss/mels', self.loss_mels)
                tf.summary.scalar('Train_Loss/dones', self.loss_dones)
                tf.summary.scalar('Train_Loss/mags', self.loss_mags)

                self.merged = tf.summary.merge_all()
コード例 #30
0
ファイル: train_multi_gpus.py プロジェクト: tzirakis/tacotron
    def __init__(self, is_training=True):
        self.graph = tf.Graph()

        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.z, self.num_batch = get_batch()
                self.decoder_inputs = shift_by_one(self.y)

                # Note that batch size was multiplied by # gpus.
                # Now we split the mini-batch data by # gpus.
                self.x = tf.split(self.x, hp.num_gpus, 0)
                self.y = tf.split(self.y, hp.num_gpus, 0)
                self.z = tf.split(self.z, hp.num_gpus, 0)
                self.decoder_inputs = tf.split(self.decoder_inputs,
                                               hp.num_gpus, 0)

                # optimizer
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)

                self.losses, self.grads_and_vars_list = [], []
                for i in range(hp.num_gpus):
                    with tf.variable_scope('net', reuse=bool(i)):
                        with tf.device('/gpu:{}'.format(i)):
                            with tf.name_scope('gpu_{}'.format(i)):
                                # Encoder
                                self.memory = encode(
                                    self.x[i],
                                    is_training=is_training)  # (N, T, E)

                                # Decoder
                                self.outputs1 = decode1(
                                    self.decoder_inputs[i],
                                    self.memory)  # (N, T', hp.n_mels*hp.r)
                                self.outputs2 = decode2(
                                    self.outputs1, is_training=is_training
                                )  # (N, T', (1+hp.n_fft//2)*hp.r)

                                # Loss
                                if hp.loss_type == "l1":  # L1 loss
                                    self.loss1 = tf.abs(self.outputs1 -
                                                        self.y[i])
                                    self.loss2 = tf.abs(self.outputs2 -
                                                        self.z[i])
                                else:  # L2 loss
                                    self.loss1 = tf.squared_difference(
                                        self.outputs1, self.y[i])
                                    self.loss2 = tf.squared_difference(
                                        self.outputs2, self.z[i])

                                # Target masking
                                if hp.target_zeros_masking:
                                    self.loss1 *= tf.to_float(
                                        tf.not_equal(self.y[i], 0.))
                                    self.loss2 *= tf.to_float(
                                        tf.not_equal(self.z[i], 0.))

                                self.mean_loss1 = tf.reduce_mean(self.loss1)
                                self.mean_loss2 = tf.reduce_mean(self.loss2)
                                self.mean_loss = self.mean_loss1 + self.mean_loss2

                                self.losses.append(self.mean_loss)
                                self.grads_and_vars = self.optimizer.compute_gradients(
                                    self.mean_loss)
                                self.grads_and_vars_list.append(
                                    self.grads_and_vars)

                with tf.device('/cpu:0'):
                    # Aggregate losses, then calculate average loss.
                    self.loss = tf.add_n(self.losses) / len(self.losses)

                    #Aggregate gradients, then calculate average gradients.
                    self.mean_grads_and_vars = []
                    for grads_and_vars in zip(*self.grads_and_vars_list):
                        grads = []
                        for grad, var in grads_and_vars:
                            grads.append(tf.expand_dims(grad, 0))
                        mean_grad = tf.reduce_mean(tf.concat(grads, 0), 0)  #()
                        self.mean_grads_and_vars.append((mean_grad, var))

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.train_op = self.optimizer.apply_gradients(
                    self.mean_grads_and_vars, self.global_step)

                # Summmary
                tf.summary.scalar('loss', self.loss)
                self.merged = tf.summary.merge_all()

            else:  # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(None, None))
                self.decoder_inputs = tf.placeholder(tf.float32,
                                                     shape=(None, None,
                                                            hp.n_mels * hp.r))

                # Encoder
                self.memory = encode(self.x,
                                     is_training=is_training)  # (N, T, E)

                # Decoder
                self.outputs1 = decode1(self.decoder_inputs,
                                        self.memory)  # (N, T', hp.n_mels*hp.r)
                self.outputs2 = decode2(
                    self.outputs1,
                    is_training=is_training)  # (N, T', (1+hp.n_fft//2)*hp.r)
コード例 #31
0
from hparams import Hparams
from model import Transformer
from utils import save_hparams, save_variable_specs, get_hypotheses

logging.basicConfig(level=logging.INFO)

logging.info("# hparams")
hparams = Hparams()
parser = hparams.parser
hp = parser.parse_args()
save_hparams(hp, hp.logdir)

logging.info("# Prepare train/eval batches")
train_batches, num_train_batches, num_train_samples = get_batch(hp.train,
                                                                hp.maxlen,
                                                                hp.maxlen,
                                                                hp.vocab,
                                                                hp.batch_size,
                                                                shuffle=True)

eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.eval,
                                                             hp.maxlen,
                                                             hp.maxlen,
                                                             hp.vocab,
                                                             hp.batch_size,
                                                             shuffle=False)

# create a iterator of the correct shape and type
iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes)
xs, ys = iter.get_next()

logging.info('# init data')
コード例 #32
0
 def __init__(self, config=None):
     self.char2idx, self.idx2char = load_vocab()
     self.graph = tf.Graph()
     with self.graph.as_default():
         self.origx, _, _, _, _, _ = get_batch(config, 'Encoder')
コード例 #33
0
import logging
from tqdm import tqdm

logging.basicConfig(level=logging.INFO)

logging.info("# hparams")
hparams = Hparams()
parser = hparams.parser
hp = parser.parse_args()
load_hparams(hp, hp.modeldir)

logging.info("# Prepare test batches")
test_batches, num_test_batches, num_test_samples = get_batch(
    hp.test_source,
    hp.test_target,
    100000,
    100000,
    hp.vocab,
    hp.test_batch_size,
    shuffle=False)
iter = tf.data.Iterator.from_structure(test_batches.output_types,
                                       test_batches.output_shapes)
xs, ys = iter.get_next()

test_init_op = iter.make_initializer(test_batches)

logging.info("# Load model")
m = Transformer(hp)
y_hat, _, refs = m.eval(xs, ys)

logging.info("# Session")
with tf.Session() as sess:
コード例 #34
0
    def __init__(self, config=None, training=True, train_form='Both'):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()
        self.graph = tf.Graph()
        with self.graph.as_default():
            if training:
                self.origx, self.x, self.y1, self.y2, self.y3, self.num_batch = get_batch(
                    config, train_form)
                self.prev_max_attentions_li = tf.ones(shape=(hp.dec_layers,
                                                             self.num_batch),
                                                      dtype=tf.int32)

            else:  # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(1, hp.T_x))
                self.y1 = tf.placeholder(tf.float32,
                                         shape=(1, hp.T_y // hp.r,
                                                hp.n_mels * hp.r))
                self.prev_max_attentions_li = tf.placeholder(tf.int32,
                                                             shape=(
                                                                 hp.dec_layers,
                                                                 1,
                                                             ))

# Get decoder inputs: feed last frames only
            if train_form != 'Converter':
                self.decoder_input = tf.concat(
                    (tf.zeros_like(self.y1[:, :1, -hp.n_mels:]),
                     self.y1[:, :-1, -hp.n_mels:]), 1)

            # Networks
            if train_form != 'Converter':
                with tf.variable_scope("encoder"):
                    self.encoded = encoder(self.x, training=training)

                with tf.variable_scope("decoder"):
                    self.mel_logits, self.done_output, self.max_attentions_li = decoder(
                        self.decoder_input,
                        self.encoded,
                        self.prev_max_attentions_li,
                        training=training)
                    #self.mel_output = self.mel_logits
                    self.mel_output = tf.nn.sigmoid(self.mel_logits)

            if train_form == 'Both':
                with tf.variable_scope("converter"):
                    #self.converter_input = tf.reshape(self.mel_output, (-1, hp.T_y, hp.n_mels))
                    self.converter_input = self.mel_output
                    self.mag_logits = converter(self.converter_input,
                                                training=training)
                    self.mag_output = tf.nn.sigmoid(self.mag_logits)
            elif train_form == 'Converter':
                with tf.variable_scope("converter"):
                    #self.converter_input = tf.reshape(self.mel_output, (-1, hp.T_y, hp.n_mels))
                    self.converter_input = self.y1
                    self.mag_logits = converter(self.converter_input,
                                                training=training)
                    self.mag_output = tf.nn.sigmoid(self.mag_logits)

            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)

            if training:
                # Loss
                if train_form != 'Converter':
                    self.loss1 = tf.reduce_mean(
                        tf.abs(self.mel_output - self.y1))
                    if hp.include_dones:
                        self.loss2 = tf.reduce_mean(
                            tf.nn.sparse_softmax_cross_entropy_with_logits(
                                logits=self.done_output, labels=self.y2))
                if train_form != 'Encoder':
                    self.loss3 = tf.reduce_mean(
                        tf.abs(self.mag_output - self.y3))

                if train_form == 'Both':
                    if hp.include_dones:
                        self.loss = self.loss1 + self.loss2 + self.loss3
                    else:
                        self.loss = self.loss1 + self.loss3
                elif train_form == 'Encoder':
                    if hp.include_dones:
                        self.loss = self.loss1 + self.loss2
                    else:
                        self.loss = self.loss1
                else:
                    self.loss = self.loss3

                # Training Scheme
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                ## gradient clipping
                self.gvs = self.optimizer.compute_gradients(self.loss)
                self.clipped = []
                for grad, var in self.gvs:
                    grad = grad if grad is None else tf.clip_by_value(
                        grad, -1. * hp.max_grad_val, hp.max_grad_val)
                    grad = grad if grad is None else tf.clip_by_norm(
                        grad, hp.max_grad_norm)
                    self.clipped.append((grad, var))

                self.train_op = self.optimizer.apply_gradients(
                    self.clipped, global_step=self.global_step)

                # Summary
                tf.summary.scalar('loss', self.loss)

                if train_form != 'Converter':
                    tf.summary.histogram('mel_output', self.mel_output)
                    tf.summary.histogram('mel_actual', self.y1)
                    tf.summary.scalar('loss1', self.loss1)
                    if hp.include_dones:
                        tf.summary.histogram('done_output', self.done_output)
                        tf.summary.histogram('done_actual', self.y2)
                        tf.summary.scalar('loss2', self.loss2)
                if train_form != 'Encoder':
                    tf.summary.histogram('mag_output', self.mag_output)
                    tf.summary.histogram('mag_actual', self.y3)
                    tf.summary.scalar('loss3', self.loss3)

                self.merged = tf.summary.merge_all()
コード例 #35
0
    def __init__(self, mode="train"):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        # Set phase
        is_training = True if mode == "train" else False

        # Graph
        # Data Feeding
        # x: Text. (N, Tx)
        # y: Reduced melspectrogram. (N, Ty//r, n_mels*r)
        # z: Magnitude. (N, Ty, n_fft//2+1)
        if mode == "train":
            self.x, self.y, self.z, self.fnames, self.num_batch = get_batch()
        elif mode == "eval":
            self.x = tf.placeholder(tf.int32, shape=(None, None))
            self.y = tf.placeholder(tf.float32,
                                    shape=(None, None, hp.n_mels * hp.r))
            self.z = tf.placeholder(tf.float32,
                                    shape=(None, None, 1 + hp.n_fft // 2))
            self.fnames = tf.placeholder(tf.string, shape=(None, ))
        else:  # Synthesize
            self.x = tf.placeholder(tf.int32, shape=(None, None))
            self.y = tf.placeholder(tf.float32,
                                    shape=(None, None, hp.n_mels * hp.r))

        # Get encoder/decoder inputs
        self.encoder_inputs = embed(self.x, len(hp.vocab),
                                    hp.embed_size)  # (N, T_x, E)
        self.decoder_inputs = tf.concat(
            (tf.zeros_like(self.y[:, :1, :]), self.y[:, :-1, :]),
            1)  # (N, Ty/r, n_mels*r)
        self.decoder_inputs = self.decoder_inputs[:, :, -hp.
                                                  n_mels:]  # feed last frames only (N, Ty/r, n_mels)

        # Networks
        with tf.variable_scope("net"):
            # Encoder
            self.memory = encoder(self.encoder_inputs,
                                  is_training=is_training)  # (N, T_x, E)

            # Decoder1
            self.y_hat, self.alignments = decoder1(
                self.decoder_inputs, self.memory,
                is_training=is_training)  # (N, T_y//r, n_mels*r)
            # Decoder2 or postprocessing
            self.z_hat = decoder2(
                self.y_hat,
                is_training=is_training)  # (N, T_y//r, (1+n_fft//2)*r)

        # monitor
        self.audio = tf.py_func(spectrogram2wav, [self.z_hat[0]], tf.float32)

        if mode in ("train", "eval"):
            # Loss
            self.loss1 = tf.reduce_mean(tf.abs(self.y_hat - self.y))
            self.loss2 = tf.reduce_mean(tf.abs(self.z_hat - self.z))
            self.loss = self.loss1 + self.loss2

            # Training Scheme
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
            self.lr = learning_rate_decay(hp.lr, global_step=self.global_step)
            self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)

            ## gradient clipping
            self.gvs = self.optimizer.compute_gradients(self.loss)
            self.clipped = []
            for grad, var in self.gvs:
                grad = tf.clip_by_norm(grad, 5.)
                self.clipped.append((grad, var))
            self.train_op = self.optimizer.apply_gradients(
                self.clipped, global_step=self.global_step)

            # Summary
            tf.summary.scalar('{}/loss1'.format(mode), self.loss1)
            tf.summary.scalar('{}/loss'.format(mode), self.loss)
            tf.summary.scalar('{}/lr'.format(mode), self.lr)

            tf.summary.image("{}/mel_gt".format(mode),
                             tf.expand_dims(self.y, -1),
                             max_outputs=1)
            tf.summary.image("{}/mel_hat".format(mode),
                             tf.expand_dims(self.y_hat, -1),
                             max_outputs=1)
            tf.summary.image("{}/mag_gt".format(mode),
                             tf.expand_dims(self.z, -1),
                             max_outputs=1)
            tf.summary.image("{}/mag_hat".format(mode),
                             tf.expand_dims(self.z_hat, -1),
                             max_outputs=1)

            tf.summary.audio("{}/sample".format(mode),
                             tf.expand_dims(self.audio, 0), hp.sr)
            self.merged = tf.summary.merge_all()
コード例 #36
0
def train(logdir, hparams):

    model = Model(mode="train1", hparams=hparams)

    # Loss
    loss_op = model.loss_net1()

    # Accuracy
    acc_op = model.acc_net1()

    # Training Scheme
    global_step = tf.Variable(0, name='global_step', trainable=False)

    optimizer = tf.train.AdamOptimizer(learning_rate=hparams.Train1.lr)
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'net/net1')
        train_op = optimizer.minimize(loss_op, global_step=global_step, var_list=var_list)

    # Summary
    # for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'net/net1'):
    #     tf.summary.histogram(v.name, v)
    tf.summary.scalar('net1/train/loss', loss_op)
    tf.summary.scalar('net1/train/acc', acc_op)
    summ_op = tf.summary.merge_all()

    #session_conf = tf.ConfigProto(
    #    gpu_options=tf.GPUOptions(
    #        allow_growth=True,
    #    ),
    #)

    session_conf=tf.ConfigProto()
    session_conf.gpu_options.per_process_gpu_memory_fraction=0.9

    # Training
    with tf.Session(config=session_conf) as sess:
        # Load trained model
        sess.run(tf.global_variables_initializer())
        model.load(sess, 'train1', logdir=logdir)

        writer = tf.summary.FileWriter(logdir, sess.graph)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        for epoch in range(1, hparams.Train1.num_epochs + 1):
            for step in range(model.num_batch):
                mfcc, ppg = get_batch(model.mode, model.batch_size)
                sess.run(train_op, feed_dict={model.x_mfcc: mfcc, model.y_ppgs: ppg})

            # Write checkpoint files at every epoch
            summ, gs = sess.run([summ_op, global_step], feed_dict={model.x_mfcc: mfcc, model.y_ppgs: ppg})


            if epoch % hparams.Train1.save_per_epoch == 0:
                tf.train.Saver().save(sess, '{}/epoch_{}_step_{}'.format(logdir, epoch, gs))

            # Write eval accuracy at every epoch
            with tf.Graph().as_default():
                eval1.eval(logdir=logdir, hparams=hparams)

            writer.add_summary(summ, global_step=gs)

        writer.close()
        coord.request_stop()
        coord.join(threads)
コード例 #37
0
import logging

logging.basicConfig(level=logging.INFO)
os.environ['CUDA_VISIBLE_DEVICES'] = "5"

logging.info("# hparams")
hparams = Hparams()
parser = hparams.parser
hp = parser.parse_args()
save_hparams(hp, hp.logdir)

logging.info("# Prepare train/eval batches")
train_batches, num_train_batches, num_train_samples = get_batch(hp.train1,
                                                                hp.train2,
                                                                hp.maxlen1,
                                                                hp.maxlen2,
                                                                hp.vocab,
                                                                hp.batch_size,
                                                                shuffle=True)
eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.eval1,
                                                             hp.eval2,
                                                             100000,
                                                             100000,
                                                             hp.vocab,
                                                             hp.batch_size,
                                                             shuffle=False)

# create a iterator of the correct shape and type
iter = tf.data.Iterator.from_structure(train_batches.output_types,
                                       train_batches.output_shapes)
xs, ys = iter.get_next()
コード例 #38
0
ファイル: train.py プロジェクト: penelopeia/tacotron
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        
        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.z, self.num_batch = get_batch()
            else: # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(None, None))
                self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r))

            self.decoder_inputs = shift_by_one(self.y)
            
            with tf.variable_scope("net"):
                # Encoder
                self.memory = encode(self.x, is_training=is_training) # (N, T, E)
                
                # Decoder 
                self.outputs1 = decode1(self.decoder_inputs, 
                                         self.memory,
                                         is_training=is_training) # (N, T', hp.n_mels*hp.r)
                self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r)
             
            if is_training:  
                # Loss
                if hp.loss_type=="l1": # L1 loss
                    self.loss1 = tf.abs(self.outputs1 - self.y)
                    self.loss2 = tf.abs(self.outputs2 - self.z)
                else: # L2 loss
                    self.loss1 = tf.squared_difference(self.outputs1, self.y)
                    self.loss2 = tf.squared_difference(self.outputs2, self.z)
                
                # Target masking
                if hp.target_zeros_masking:
                    self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.))
                    self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.))
                
                self.mean_loss1 = tf.reduce_mean(self.loss1)
                self.mean_loss2 = tf.reduce_mean(self.loss2)
                self.mean_loss = self.mean_loss1 + self.mean_loss2 
                
                # Logging  
                ## histograms
                self.expected1_h = tf.reduce_mean(tf.reduce_mean(self.y, -1), 0)
                self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1),0)
                
                self.expected2_h = tf.reduce_mean(tf.reduce_mean(self.z, -1), 0)
                self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1),0)
                
                ## images
                self.expected1_i = tf.expand_dims(tf.reduce_mean(self.y[:1], -1, keep_dims=True), 1)
                self.got1_i = tf.expand_dims(tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1)
                
                self.expected2_i = tf.expand_dims(tf.reduce_mean(self.z[:1], -1, keep_dims=True), 1)
                self.got2_i = tf.expand_dims(tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1)
                                                
                # Training Scheme
                self.global_step = tf.Variable(0, name='global_step', trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                self.train_op = self.optimizer.minimize(self.mean_loss, global_step=self.global_step)
                   
                # Summmary 
                tf.summary.scalar('mean_loss1', self.mean_loss1)
                tf.summary.scalar('mean_loss2', self.mean_loss2)
                tf.summary.scalar('mean_loss', self.mean_loss)
                
                tf.summary.histogram('expected_values1', self.expected1_h)
                tf.summary.histogram('gotten_values1', self.got1_h)
                tf.summary.histogram('expected_values2', self.expected2_h)
                tf.summary.histogram('gotten values2', self.got2_h)
                                
                tf.summary.image("expected_values1", self.expected1_i*255)
                tf.summary.image("gotten_values1", self.got1_i*255)
                tf.summary.image("expected_values2", self.expected2_i*255)
                tf.summary.image("gotten_values2", self.got2_i*255)
                
                self.merged = tf.summary.merge_all()