def save_states(global_step,
                mel_outputs,
                linear_outputs,
                attn,
                y,
                input_lengths,
                checkpoint_dir=None):
    print("Save intermediate states at step {}".format(global_step))

    # idx = np.random.randint(0, len(input_lengths))
    idx = min(1, len(input_lengths) - 1)
    input_length = input_lengths[idx]

    # Alignment
    path = join(checkpoint_dir, "step{}_alignment.png".format(global_step))
    # alignment = attn[idx].cpu().data.numpy()[:, :input_length]
    alignment = attn[idx].cpu().data.numpy()
    save_alignment(path, alignment)

    # Predicted spectrogram
    path = join(checkpoint_dir,
                "step{}_predicted_spectrogram.png".format(global_step))
    linear_output = linear_outputs[idx].cpu().data.numpy()
    save_spectrogram(path, linear_output)

    # Predicted audio signal
    signal = audio.inv_spectrogram(linear_output.T)
    path = join(checkpoint_dir, "step{}_predicted.wav".format(global_step))
    audio.save_wav(signal, path)

    # Target spectrogram
    path = join(checkpoint_dir,
                "step{}_target_spectrogram.png".format(global_step))
    linear_output = y[idx].cpu().data.numpy()
    save_spectrogram(path, linear_output)
Beispiel #2
0
def tts(model, text):
    """Convert text to speech waveform given a Tacotron model.
    """
    if use_cuda:
        model = model.cuda()
    # TODO: Turning off dropout of decoder's prenet causes serious performance
    # regression, not sure why.
    # model.decoder.eval()
    model.encoder.eval()
    model.postnet.eval()

    sequence = np.array(text_to_sequence(text, [hparams.cleaners]))
    sequence = Variable(torch.from_numpy(sequence)).unsqueeze(0)
    if use_cuda:
        sequence = sequence.cuda()

    # Greedy decoding
    mel_outputs, linear_outputs, alignments = model(sequence)

    linear_output = linear_outputs[0].cpu().data.numpy()
    spectrogram = audio._denormalize(linear_output)
    alignment = alignments[0].cpu().data.numpy()

    # Predicted audio signal
    waveform = audio.inv_spectrogram(linear_output.T)

    return waveform, alignment, spectrogram
Beispiel #3
0
 def synthesize(self, text, save_path=None):
     seq = textinput.to_sequence(
         text,
         force_lowercase=hparams.force_lowercase,
         expand_abbreviations=hparams.expand_abbreviations)
     feed_dict = {
         self.model.inputs: [np.asarray(seq, dtype=np.int32)],
         self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32)
     }
     spec = self.session.run(self.model.linear_outputs[0],
                             feed_dict=feed_dict)
     if save_path is not None:
         out = save_path
         audio.save_wav(audio.inv_spectrogram(spec.T), out)
         return out
     else:
         out = io.BytesIO()
         audio.save_wav(audio.inv_spectrogram(spec.T), out)
         return out.getvalue()
Beispiel #4
0
    def synthesize(self, text):
        with chainer.using_config('train', False):
            seq = textinput.to_sequence(
                text,
                force_lowercase=hparams.force_lowercase,
                expand_abbreviations=hparams.expand_abbreviations)

            spec = self.model.output(seq)
            out = io.BytesIO()
            audio.save_wav(audio.inv_spectrogram(spec.T), out)
            return out.getvalue()
 def synthesize(self, text):
     cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]
     seq = text_to_sequence(text, cleaner_names)
     feed_dict = {
         self.model.inputs: [np.asarray(seq, dtype=np.int32)],
         self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32)
     }
     spec = self.session.run(self.model.linear_outputs[0],
                             feed_dict=feed_dict)
     out = io.BytesIO()
     audio.save_wav(audio.inv_spectrogram(spec.T), out)
     return out.getvalue()
Beispiel #6
0
 def save_audio():
     # model instance has spectrogram data which was processed last
     spectrogram = model.spectrogram  #TODO: change this specification
     waveform = audio.inv_spectrogram(spectrogram.T)
     audio.save_wav(
         waveform,
         os.path.join(
             log_dir,
             'iteration_{.updater.iteration}-audio.wav'.format(trainer)))
     plot.plot_alignment(
         alignment,
         os.path.join(
             log_dir,
             'iteration_{.updater.iteration}-align.png'.format(trainer)),
         info='%s, %s, %s, iteration_{.updater.iteration}, loss=%.5f'.
         format(args.model, commit, time_string(), trainer, loss))
     log('Input: %s' % textinput.to_string(input_seq))
Beispiel #7
0
    def synthesize(self, text):
        cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]
        seq = text_to_sequence(text, cleaner_names)
        feed_dict = {
            self.model.inputs: [np.asarray(seq, dtype=np.int32)],
            self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32)
        }
        # wav = self.session.run(self.wav_output, feed_dict=feed_dict)
        # wav = audio.inv_preemphasis(wav)
        # wav = wav[:audio.find_endpoint(wav)]

        linear = self.session.run(self.linear_output, feed_dict=feed_dict)
        linear = linear[:audio.
                        find_endpoint_spectrogram(linear, threshold_db=-10)]
        wav = audio.inv_spectrogram(linear.T)
        wav = wav[:audio.find_endpoint(wav)]
        out = io.BytesIO()
        audio.save_wav(wav, out)
        return out.getvalue()
Beispiel #8
0
 def synthesize(self, text1, text2):
     seq1 = textinput_fr.to_sequence(
         text1,
         force_lowercase=hparams.force_lowercase,
         expand_abbreviations=hparams.expand_abbreviations)
     seq2 = textinput_fr.to_sequence(
         text2,
         force_lowercase=hparams.force_lowercase,
         expand_abbreviations=False)
     feed_dict = {
         self.model.inputs1: [np.asarray(seq1, dtype=np.int32)],
         self.model.input_lengths1: np.asarray([len(seq1)], dtype=np.int32),
         self.model.inputs2: [np.asarray(seq2, dtype=np.int32)],
         self.model.input_lengths2: np.asarray([len(seq2)], dtype=np.int32)
     }
     spec, alignments1, alignments2 = self.session.run([
         self.model.linear_outputs[0], self.model.alignments1[0],
         self.model.alignments2[0]
     ],
                                                       feed_dict=feed_dict)
     out = io.BytesIO()
     audio.save_wav(audio.inv_spectrogram(spec.T), out)
     return out.getvalue(), alignments1, alignments2
Beispiel #9
0
def train(log_dir, args):
    checkpoint_path = os.path.join(log_dir, 'model.ckpt')
    log('Checkpoint path: %s' % checkpoint_path)
    log('Using model: %s' % args.model)
    log(hparams_debug_string())

    sequence_to_text = sequence_to_text2

    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Multi-GPU settings
        GPUs_id = eval(args.GPUs_id)
        num_GPU = len(GPUs_id)
        hparams.num_GPU = num_GPU
        models = []

        # Set up DataFeeder:
        coord = tf.train.Coordinator()

        if args.data_type == 'tfrecord':
            with open('./train_data_dict.json', 'r') as f:
                train_data_dict = json.load(f)
            train_data = args.train_data.split(',')
            file_list = []
            pattern = '[.]*\\_id\\_num\\_([0-9]+)[.]+'
            id_num = 0
            for item in train_data:
                file_list.append(train_data_dict[item])
                id_num += int(re.findall(pattern, train_data_dict[item])[0])
            log('train data:%s' % args.train_data)

            feeder = DataFeeder_tfrecord(hparams, file_list)
            inputs, input_lengths, linear_targets, mel_targets, n_frames, wavs, identities = feeder._get_batch_input(
            )

        elif args.data_type == 'npy':
            with open('./train_npy_data_dict.json', 'r') as f:
                train_data_dict = json.load(f)
            train_data = args.train_data.split(',')
            file_list = []
            pattern = '[.]*\\_id\\_num\\_([0-9]+)[.]+'
            id_num = 0
            for item in train_data:
                file_list.append(train_data_dict[item])
                id_num += int(re.findall(pattern, train_data_dict[item])[0])
            log('train data:%s' % args.train_data)

            feeder = DataFeeder_npy(hparams, file_list, coord)
            inputs = feeder.inputs
            input_lengths = feeder.input_lengths
            mel_targets = feeder.mel_targets
            linear_targets = feeder.linear_targets
            wavs = feeder.wavs
            identities = feeder.identities

        else:
            raise ('not spificied the input data type')

        # Set up model:
        global_step = tf.Variable(0, name='global_step', trainable=False)
        with tf.variable_scope('model') as scope:
            for i, GPU_id in enumerate(GPUs_id):
                with tf.device('/gpu:%d' % GPU_id):
                    with tf.name_scope('GPU_%d' % GPU_id):
                        models.append(None)
                        models[i] = create_model(args.model, hparams)
                        models[i].initialize(inputs=inputs,
                                             input_lengths=input_lengths,
                                             mel_targets=mel_targets,
                                             linear_targets=linear_targets,
                                             identities=identities,
                                             id_num=id_num)
                        models[i].add_loss()
                        models[i].add_optimizer(global_step)
                        stats = add_stats(models[i])

        # Bookkeeping:
        step = 0
        time_window = ValueWindow(250)
        loss_window = ValueWindow(1000)
        saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=8)
        # Train!
        config = tf.ConfigProto(log_device_placement=False,
                                allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            try:
                summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
                sess.run(tf.global_variables_initializer())
                if args.restore_step:
                    # Restore from a checkpoint if the user requested it.
                    restore_path = '%s-%d' % (checkpoint_path,
                                              args.restore_step)
                    saver.restore(sess, restore_path)
                    log('Resuming from checkpoint: %s' % restore_path)
                else:
                    log('Starting new training run')

                if args.data_type == 'tfrecord':
                    tf.train.start_queue_runners(sess=sess, coord=coord)
                    feeder.start_threads(sess=sess, coord=coord)
                elif args.data_type == 'npy':
                    feeder.start_in_session(sess)

                while not coord.should_stop():
                    start_time = time.time()

                    step, loss, opt, loss_regularity = sess.run([
                        global_step,
                        models[0].loss,
                        models[0].optimize,
                        models[0].loss_regularity,
                    ])

                    time_window.append(time.time() - start_time)
                    loss_window.append(loss)
                    message = 'Step %-7d [%.03f avg_sec/step,  loss=%.05f,  avg_loss=%.05f,  lossw=%.05f]' % (
                        step, time_window.average, loss, loss_window.average,
                        loss_regularity)
                    log(message)

                    # if the gradient seems to explode, then restore to the previous step
                    if loss > 2 * loss_window.average or math.isnan(loss):
                        log('recover to the previous checkpoint')
                        restore_step = int(
                            (step - 10) / args.checkpoint_interval
                        ) * args.checkpoint_interval
                        restore_path = '%s-%d' % (checkpoint_path,
                                                  restore_step)
                        saver.restore(sess, restore_path)
                        continue

                    if step % args.summary_interval == 0:
                        log('Writing summary at step: %d' % step)
                        summary_writer.add_summary(sess.run(stats), step)

                    if step % args.checkpoint_interval == 0:
                        crrt_dir = os.path.join(log_dir, str(step))
                        os.makedirs(crrt_dir, exist_ok=True)

                        log('Saving checkpoint to: %s-%d' %
                            (checkpoint_path, step))
                        saver.save(sess, checkpoint_path, global_step=step)
                        log('Saving audio and alignment...')
                        input_seq, spectrogram, alignment, wav_original, melspectogram, spec_original, mel_original, \
                        identity2 = sess.run([models[0].inputs[0], models[0].linear_outputs[0], models[0].alignments[0],
                                              wavs[0],models[0].mel_outputs[0], linear_targets[0], mel_targets[0],
                                              identities[0]])
                        waveform = audio.inv_spectrogram(spectrogram.T)
                        audio.save_wav(
                            waveform,
                            os.path.join(crrt_dir, 'step-%d-audio.wav' % step))
                        audio.save_wav(
                            wav_original,
                            os.path.join(
                                crrt_dir, 'step-%d-audio-original-%d.wav' %
                                (step, identity2)))
                        np.save(os.path.join(crrt_dir, 'spec.npy'),
                                spectrogram,
                                allow_pickle=False)
                        np.save(os.path.join(crrt_dir, 'melspectogram.npy'),
                                melspectogram,
                                allow_pickle=False)
                        np.save(os.path.join(crrt_dir, 'spec_original.npy'),
                                spec_original,
                                allow_pickle=False)
                        np.save(os.path.join(crrt_dir, 'mel_original.npy'),
                                mel_original,
                                allow_pickle=False)
                        plot.plot_alignment(
                            alignment,
                            os.path.join(crrt_dir, 'step-%d-align.png' % step),
                            info='%s, %s, step=%d, loss=%.5f' %
                            (args.model, time_string(), step, loss))

                        #提取alignment, 看看对其效果如何
                        transition_params = []
                        for i in range(alignment.shape[0]):
                            transition_params.append([])
                            for j in range(alignment.shape[0]):
                                if i == j or j - i == 1:
                                    transition_params[-1].append(500)
                                else:
                                    transition_params[-1].append(0.0)
                        alignment[0][0] = 100000
                        alignment2 = np.argmax(alignment, axis=0)
                        alignment3 = tf.contrib.crf.viterbi_decode(
                            alignment.T, transition_params)
                        alignment4 = np.zeros(alignment.shape)
                        for i, item in enumerate(alignment3[0]):
                            alignment4[item, i] = 1
                        plot.plot_alignment(
                            alignment4,
                            os.path.join(crrt_dir,
                                         'step-%d-align2.png' % step),
                            info='%s, %s, step=%d, loss=%.5f' %
                            (args.model, time_string(), step, loss))

                        crrt = 0
                        sample_crrt = 0
                        sample_last = 0
                        for i, item in enumerate(alignment3[0]):
                            if item == crrt:
                                sample_crrt += hparams.sample_rate * hparams.frame_shift_ms * hparams.outputs_per_step\
                                               / 1000
                            if not item == crrt:
                                crrt += 1
                                sample_crrt = int(sample_crrt)
                                sample_last = int(sample_last)
                                wav_crrt = waveform[:sample_crrt]
                                wav_crrt2 = waveform[sample_last:sample_crrt]
                                audio.save_wav(
                                    wav_crrt,
                                    os.path.join(crrt_dir, '%d.wav' % crrt))
                                audio.save_wav(
                                    wav_crrt2,
                                    os.path.join(crrt_dir, '%d-2.wav' % crrt))
                                sample_last = sample_crrt
                                sample_crrt += hparams.sample_rate * hparams.frame_shift_ms * hparams.outputs_per_step \
                                               / 1000

                        input_seq2 = []
                        input_seq3 = []
                        for item in alignment2:
                            input_seq2.append(input_seq[item])
                        for item in alignment3[0]:
                            input_seq3.append(input_seq[item])

                        #output alignment
                        path_align1 = os.path.join(crrt_dir,
                                                   'step-%d-align1.txt' % step)
                        path_align2 = os.path.join(crrt_dir,
                                                   'step-%d-align2.txt' % step)
                        path_align3 = os.path.join(crrt_dir,
                                                   'step-%d-align3.txt' % step)
                        path_seq1 = os.path.join(crrt_dir,
                                                 'step-%d-input1.txt' % step)
                        path_seq2 = os.path.join(crrt_dir,
                                                 'step-%d-input2.txt' % step)
                        path_seq3 = os.path.join(crrt_dir,
                                                 'step-%d-input3.txt' % step)
                        with open(path_align1, 'w') as f:
                            for row in alignment:
                                for item in row:
                                    f.write('%.3f' % item)
                                    f.write('\t')
                                f.write('\n')
                        with open(path_align2, 'w') as f:
                            for item in alignment2:
                                f.write('%.3f' % item)
                                f.write('\t')
                        with open(path_align3, 'w') as f:
                            for item in alignment3[0]:
                                f.write('%.3f' % item)
                                f.write('\t')
                        with open(path_seq1, 'w') as f:
                            f.write(sequence_to_text(input_seq))
                        with open(path_seq2, 'w') as f:
                            f.write(sequence_to_text(input_seq2))
                        with open(path_seq3, 'w') as f:
                            f.write(sequence_to_text(input_seq3))
                        log('Input: %s' % sequence_to_text(input_seq))
                        log('Input: %s' % str(input_seq))

            except Exception as e:
                log('Exiting due to exception: %s' % e)
                traceback.print_exc()
                coord.request_stop(e)
Beispiel #10
0
def train(log_dir, input_path, checkpoint_path, is_restore):
    # Log the info
    log('Checkpoint path: %s' % checkpoint_path)
    log('Loading training data from: %s' % input_path)
    log(hparams_debug_string())

    # Set up DataFeeder:
    coord = tf.train.Coordinator()
    with tf.variable_scope('datafeeder') as scope:
        feeder = DataFeeder(coord, input_path, hparams)

    # Set up model:
    global_step = tf.Variable(0, name='global_step', trainable=False)
    with tf.variable_scope('model') as scope:
        model = create_model('tacotron', hparams)
        model.initialize(feeder.inputs, feeder.input_lengths,
                         feeder.mel_targets, feeder.linear_targets)
        model.add_loss()
        model.add_optimizer(global_step)
        stats = add_stats(model)

    # Bookkeeping:
    step = 0
    saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2)
    # Train!
    with tf.Session() as sess:
        try:
            summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
            sess.run(tf.global_variables_initializer())

            if is_restore:
                # Restore from a checkpoint if the user requested it.
                restore_path = '%s' % (checkpoint_path)
                saver.restore(sess, restore_path)
                log('Resuming from checkpoint')
            else:
                log('Starting new training')

            feeder.start_in_session(sess)

            while not coord.should_stop():
                start_time = time.time()
                step, loss, opt = sess.run(
                    [global_step, model.loss, model.optimize])
                time_interval = time.time() - start_time

                message = 'Step %d, %.03f sec, loss=%.05f' % (step, loss,
                                                              time_interval)
                log(message)

                if loss > 100 or math.isnan(loss):
                    log('Loss exploded to %.05f at step %d!' % (loss, step),
                        slack=True)
                    raise Exception('Loss Exploded')

                if step % hparams.summary_interval == 0:
                    log('Writing summary at step: %d' % step)
                    summary_writer.add_summary(sess.run(stats), step)

                if step % hparams.checkpoint_interval == 0:
                    log('Saving checkpoint to: %s-%d' %
                        (checkpoint_path, step))
                    saver.save(sess, checkpoint_path, global_step=step)
                    log('Saving audio and alignment...')
                    input_seq, spectrogram, alignment = sess.run([
                        model.inputs[0], model.linear_outputs[0],
                        model.alignments[0]
                    ])
                    waveform = audio.inv_spectrogram(spectrogram.T)
                    audio.save_wav(
                        waveform,
                        os.path.join(log_dir, 'step-%d-audio.wav' % step))
                    plot.plot_alignment(
                        alignment,
                        os.path.join(log_dir, 'step-%d-align.png' % step),
                        info='%s, %s, step=%d, loss=%.5f' %
                        ('tacotron', time_string(), step, loss))
                    log('Input: %s' % sequence_to_text(input_seq))

        except Exception as e:
            log('Exiting due to exception: %s' % e, slack=True)
            coord.request_stop(e)
Beispiel #11
0
import numpy as np
from util import audio

spectrogram = np.load('/home/toan/tacotron/training/ljspeech-spec-00844.npy')
wav = audio.inv_spectrogram(spectrogram.T)
audio.save_wav(wav, '/home/toan/tacotron/test.wav')
Beispiel #12
0
def train(log_dir, args):
    commit = get_git_commit() if args.git else 'None'
    checkpoint_path = os.path.join(log_dir, 'model.ckpt')
    input_path = os.path.join(args.base_dir, args.input)
    log('Checkpoint path: %s' % checkpoint_path)
    log('Loading training data from: %s' % input_path)
    log('Using model: %s' % args.model)
    log(hparams_debug_string())

    # graph
    with tf.Graph().as_default(), tf.device('/cpu:0'):

        #new attributes of hparams
        #hparams.num_GPU = len(GPUs_id)
        #hparams.datasets = eval(args.datasets)
        hparams.datasets = eval(args.datasets)
        hparams.prenet_layer1 = args.prenet_layer1
        hparams.prenet_layer2 = args.prenet_layer2
        hparams.gru_size = args.gru_size
        hparams.attention_size = args.attention_size
        hparams.rnn_size = args.rnn_size
        hparams.enable_fv1 = args.enable_fv1
        hparams.enable_fv2 = args.enable_fv2

        if args.batch_size:
            hparams.batch_size = args.batch_size

        # Multi-GPU settings
        GPUs_id = eval(args.GPUs_id)
        hparams.num_GPU = len(GPUs_id)
        tower_grads = []
        tower_loss = []
        models = []

        global_step = tf.Variable(-1, name='global_step', trainable=False)
        if hparams.decay_learning_rate:
            learning_rate = _learning_rate_decay(hparams.initial_learning_rate,
                                                 global_step, hparams.num_GPU)
        else:
            learning_rate = tf.convert_to_tensor(hparams.initial_learning_rate)
        # Set up DataFeeder:
        coord = tf.train.Coordinator()
        with tf.variable_scope('datafeeder') as scope:
            input_path = os.path.join(args.base_dir, args.input)
            feeder = DataFeeder(coord, input_path, hparams)
            inputs = feeder.inputs
            inputs = tf.split(inputs, hparams.num_GPU, 0)
            input_lengths = feeder.input_lengths
            input_lengths = tf.split(input_lengths, hparams.num_GPU, 0)
            mel_targets = feeder.mel_targets
            mel_targets = tf.split(mel_targets, hparams.num_GPU, 0)
            linear_targets = feeder.linear_targets
            linear_targets = tf.split(linear_targets, hparams.num_GPU, 0)

        # Set up model:
        with tf.variable_scope('model') as scope:
            optimizer = tf.train.AdamOptimizer(learning_rate,
                                               hparams.adam_beta1,
                                               hparams.adam_beta2)
            for i, GPU_id in enumerate(GPUs_id):
                with tf.device('/gpu:%d' % GPU_id):
                    with tf.name_scope('GPU_%d' % GPU_id):

                        if hparams.enable_fv1 or hparams.enable_fv2:
                            net = ResCNN(data=mel_targets[i],
                                         batch_size=hparams.batch_size,
                                         hyparam=hparams)
                            net.inference()

                            voice_print_feature = tf.reduce_mean(
                                net.features, 0)
                        else:
                            voice_print_feature = None

                        models.append(None)
                        models[i] = create_model(args.model, hparams)
                        models[i].initialize(
                            inputs=inputs[i],
                            input_lengths=input_lengths[i],
                            mel_targets=mel_targets[i],
                            linear_targets=linear_targets[i],
                            voice_print_feature=voice_print_feature)
                        models[i].add_loss()
                        """L2 weight decay loss."""
                        if args.weight_decay > 0:
                            costs = []
                            for var in tf.trainable_variables():
                                #if var.op.name.find(r'DW') > 0:
                                costs.append(tf.nn.l2_loss(var))
                                # tf.summary.histogram(var.op.name, var)
                            weight_decay = tf.cast(args.weight_decay,
                                                   tf.float32)
                            cost = models[i].loss
                            models[i].loss += tf.multiply(
                                weight_decay, tf.add_n(costs))
                            cost_pure_wd = tf.multiply(weight_decay,
                                                       tf.add_n(costs))
                        else:
                            cost = models[i].loss
                            cost_pure_wd = tf.constant([0])

                        tower_loss.append(models[i].loss)

                        tf.get_variable_scope().reuse_variables()
                        models[i].add_optimizer(global_step, optimizer)

                        tower_grads.append(models[i].gradients)

            # calculate average gradient
            gradients = average_gradients(tower_grads)

            stats = add_stats(models[0], gradients, learning_rate)
            time.sleep(10)

        # apply average gradient

        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            apply_gradient_op = optimizer.apply_gradients(
                gradients, global_step=global_step)

        # Bookkeeping:
        step = 0
        time_window = ValueWindow(100)
        loss_window = ValueWindow(100)
        saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2)

        # Train!
        config = tf.ConfigProto(log_device_placement=False,
                                allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            try:
                summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
                sess.run(tf.global_variables_initializer())

                if args.restore_step:
                    # Restore from a checkpoint if the user requested it.
                    restore_path = '%s-%d' % (checkpoint_path,
                                              args.restore_step)
                    saver.restore(sess, restore_path)
                    log('Resuming from checkpoint: %s at commit: %s' %
                        (restore_path, commit),
                        slack=True)
                else:
                    log('Starting new training run at commit: %s' % commit,
                        slack=True)

                feeder.start_in_session(sess)

                while not coord.should_stop():
                    start_time = time.time()
                    model = models[0]

                    step, loss, opt, loss_wd, loss_pure_wd = sess.run([
                        global_step, cost, apply_gradient_op, model.loss,
                        cost_pure_wd
                    ])
                    feeder._batch_in_queue -= 1
                    log('feed._batch_in_queue: %s' %
                        str(feeder._batch_in_queue),
                        slack=True)

                    time_window.append(time.time() - start_time)
                    loss_window.append(loss)
                    message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f, loss_wd=%.05f, loss_pure_wd=%.05f]' % (
                        step, time_window.average, loss, loss_window.average,
                        loss_wd, loss_pure_wd)
                    log(message, slack=(step % args.checkpoint_interval == 0))

                    #if the gradient seems to explode, then restore to the previous step
                    if loss > 2 * loss_window.average or math.isnan(loss):
                        log('recover to the previous checkpoint')
                        #tf.reset_default_graph()
                        restore_step = int(
                            (step - 10) / args.checkpoint_interval
                        ) * args.checkpoint_interval
                        restore_path = '%s-%d' % (checkpoint_path,
                                                  restore_step)
                        saver.restore(sess, restore_path)
                        continue

                    if loss > 100 or math.isnan(loss):
                        log('Loss exploded to %.05f at step %d!' %
                            (loss, step),
                            slack=True)
                        raise Exception('Loss Exploded')

                    try:
                        if step % args.summary_interval == 0:
                            log('Writing summary at step: %d' % step)
                            summary_writer.add_summary(sess.run(stats), step)
                    except:
                        pass

                    if step % args.checkpoint_interval == 0:
                        log('Saving checkpoint to: %s-%d' %
                            (checkpoint_path, step))
                        saver.save(sess, checkpoint_path, global_step=step)
                        log('Saving audio and alignment...')
                        input_seq, spectrogram, alignment = sess.run([
                            model.inputs[0], model.linear_outputs[0],
                            model.alignments[0]
                        ])
                        waveform = audio.inv_spectrogram(spectrogram.T)
                        audio.save_wav(
                            waveform,
                            os.path.join(log_dir, 'step-%d-audio.wav' % step))
                        plot.plot_alignment(
                            alignment,
                            os.path.join(log_dir, 'step-%d-align.png' % step),
                            info='%s, %s, %s, step=%d, loss=%.5f' %
                            (args.model, commit, time_string(), step, loss))
                        log('Input: %s' % sequence_to_text(input_seq))

            except Exception as e:
                log('Exiting due to exception: %s' % e, slack=True)
                traceback.print_exc()
                coord.request_stop(e)
Beispiel #13
0
def train(log_dir, args):
    checkpoint_path = os.path.join(log_dir, 'model.ckpt')
    input_path = os.path.join(args.base_dir, args.input)
    # 显示模型的路径信息
    log('Checkpoint path: %s' % checkpoint_path)
    log('Loading training data from: %s' % input_path)
    coord = tf.train.Coordinator()
    with tf.variable_scope('datafeeder') as scope:
        feeder = DataFeeder(coord, input_path, hparams)

    # 初始化模型
    global_step = tf.Variable(0, name='global_step', trainable=False)
    with tf.variable_scope('model') as scope:
        model = Tacotron(hparams)
        model.initialize(feeder.inputs, feeder.input_lengths,
                         feeder.mel_targets, feeder.linear_targets,
                         feeder.stop_token_targets, global_step)
        model.add_loss()
        model.add_optimizer(global_step)
        stats = add_stats(model)

    step = 0
    time_window = ValueWindow(100)
    loss_window = ValueWindow(100)
    saver = tf.train.Saver(max_to_keep=1)

    # 开始训练
    with tf.Session() as sess:
        try:
            summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
            sess.run(tf.global_variables_initializer())
            feeder.start_in_session(sess)

            while not coord.should_stop():
                start_time = time.time()
                step, loss, opt = sess.run(
                    [global_step, model.loss, model.optimize])
                time_window.append(time.time() - start_time)
                loss_window.append(loss)
                message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % (
                    step, time_window.average, loss, loss_window.average)
                log(message, slack=(step % args.checkpoint_interval == 0))

                if step % args.summary_interval == 0:
                    summary_writer.add_summary(sess.run(stats), step)

                # 每隔一定的训练步数生成检查点
                if step % args.checkpoint_interval == 0:
                    log('Saving checkpoint to: %s-%d' %
                        (checkpoint_path, step))
                    saver.save(sess, checkpoint_path, global_step=step)
                    log('Saving audio and alignment...')
                    input_seq, spectrogram, alignment = sess.run([
                        model.inputs[0], model.linear_outputs[0],
                        model.alignments[0]
                    ])
                    waveform = audio.inv_spectrogram(spectrogram.T)
                    # 合成样音
                    audio.save_wav(
                        waveform,
                        os.path.join(log_dir, 'step-%d-audio.wav' % step))
                    time_string = datetime.now().strftime('%Y-%m-%d %H:%M')
                    # 画Encoder-Decoder对齐图
                    infolog.plot_alignment(
                        alignment,
                        os.path.join(log_dir, 'step-%d-align.png' % step),
                        info='%s,  %s, step=%d, loss=%.5f' %
                        (args.model, time_string, step, loss))
                    # 显示合成样音的文本
                    log('Input: %s' % sequence_to_text(input_seq))

        except Exception as e:
            log('Exiting due to exception: %s' % e, slack=True)
            traceback.print_exc()
            coord.request_stop(e)
Beispiel #14
0
def train(log_dir, args, trans_ckpt_dir=None):
    commit = get_git_commit() if args.git else 'None'
    checkpoint_path = os.path.join(log_dir, 'model.ckpt')
    if trans_ckpt_dir != None:
        trans_checkpoint_path = os.path.join(trans_ckpt_dir, 'model.ckpt')

    input_path = os.path.join(args.base_dir, args.input)
    log('Checkpoint path: %s' % trans_checkpoint_path)
    log('Loading training data from: %s' % input_path)
    log('Using model: %s' % args.model)
    log(hparams_debug_string())

    # Set up DataFeeder:
    coord = tf.train.Coordinator()
    with tf.variable_scope('datafeeder') as scope:
        feeder = DataFeeder(coord, input_path, hparams)

    # Set up model:
    global_step = tf.Variable(0, name='global_step', trainable=False)
    with tf.variable_scope('model') as scope:
        model = create_model(args.model, hparams)
        model.initialize(feeder.inputs, feeder.input_lengths,
                         feeder.mel_targets, feeder.linear_targets)
        model.add_loss()
        model.add_optimizer(global_step)
        stats = add_stats(model)

    # Bookkeeping:
    step = 0
    time_window = ValueWindow(100)
    loss_window = ValueWindow(100)
    saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2)

    # Train!
    with tf.Session() as sess:
        try:
            summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
            sess.run(tf.global_variables_initializer())

            if args.restore_step:
                # Restore from a checkpoint if the user requested it.
                restore_path = '%s-%d' % (trans_checkpoint_path,
                                          args.restore_step)
                saver.restore(sess, restore_path)
                log('Resuming from checkpoint: %s at commit: %s' %
                    (restore_path, commit),
                    slack=True)
            else:
                log('Starting new training run at commit: %s' % commit,
                    slack=True)

            feeder.start_in_session(sess)

            while not coord.should_stop():
                start_time = time.time()
                step, loss, opt = sess.run(
                    [global_step, model.loss, model.optimize])
                time_window.append(time.time() - start_time)
                loss_window.append(loss)
                message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % (
                    step, time_window.average, loss, loss_window.average)
                log(message, slack=(step % args.checkpoint_interval == 0))

                if loss > 100 or math.isnan(loss):
                    log('Loss exploded to %.05f at step %d!' % (loss, step),
                        slack=True)
                    raise Exception('Loss Exploded')

                if step % args.summary_interval == 0:
                    log('Writing summary at step: %d' % step)
                    summary_writer.add_summary(sess.run(stats), step)

                if step % args.checkpoint_interval == 0:
                    log('Saving checkpoint to: %s-%d' %
                        (checkpoint_path, step))
                    saver.save(sess, checkpoint_path, global_step=step)
                    log('Saving audio and alignment...')
                    input_seq, spectrogram, alignment = sess.run([
                        model.inputs[0], model.linear_outputs[0],
                        model.alignments[0]
                    ])
                    waveform = audio.inv_spectrogram(spectrogram.T)
                    audio.save_wav(
                        waveform,
                        os.path.join(log_dir, 'step-%d-audio.wav' % step))
                    plot.plot_alignment(
                        alignment,
                        os.path.join(log_dir, 'step-%d-align.png' % step),
                        info='%s, %s, %s, step=%d, loss=%.5f' %
                        (args.model, commit, time_string(), step, loss))
                    log('Input: %s' % sequence_to_text(input_seq))

        except Exception as e:
            log('Exiting due to exception: %s' % e, slack=True)
            traceback.print_exc()
            coord.request_stop(e)
Beispiel #15
0
def train(log_dir, args):
    run_name = args.name or args.model

    log_dir = os.path.join(args.base_dir, 'logs-%s' % run_name)
    os.makedirs(log_dir, exist_ok=True)
    infolog.init(os.path.join(log_dir, 'train.log'), run_name, args.slack_url)
    checkpoint_path = os.path.join(log_dir, 'model.ckpt')

    with open(args.input, encoding='utf-8') as f:
        metadata = [row.strip().split('|') for row in f]
    metadata = sorted(metadata, key=lambda x: x[2])

    data_element = get_dataset(metadata, args.data_dir, hparams)

    global_step = tf.Variable(0, name='global_step', trainable=False)
    with tf.variable_scope('model') as scope:
        model = create_model(args.model, hparams)
        model.initialize(data_element['input'], data_element['input_lengths'],
                         data_element['mel_targets'],
                         data_element['linear_targets'])
        model.add_loss()
        model.add_optimizer(global_step)

    saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    for _ in range(int(args.max_iter)):

        start_time = time.time()
        step, mel_loss, lin_loss, loss, opt = sess.run([
            global_step, model.mel_loss, model.linear_loss, model.loss,
            model.optimize
        ])
        end_time = time.time()

        message = 'Step %7d [%.03f sec/step, loss = %.05f (mel : %.05f + lin : %.05f)]' % (
            step, end_time - start_time, loss, mel_loss, lin_loss)

        log(message)

        if loss > 100 or math.isnan(loss):
            log('Loss exploded to %.05f at step %d!' % (loss, step))
            raise Exception('Loss Exploded')

        if step % args.checkpoint_interval == 0:
            log('Saving checkpoint to: %s-%d' % (checkpoint_path, step))
            saver.save(sess, checkpoint_path, global_step=step)

            log('Saving audio and alignment...')
            input_seq, spectrogram, alignment = sess.run([
                model.inputs[0], model.linear_outputs[0], model.alignments[0]
            ])
            waveform = audio.inv_spectrogram(spectrogram.T)
            audio.save_wav(waveform,
                           os.path.join(log_dir, 'step-%d-audio.wav' % step))
            plot.plot_alignment(alignment,
                                os.path.join(log_dir,
                                             'step-%d-align.png' % step),
                                info='%s, %s, step=%d, loss=%.5f' %
                                (args.model, time_string(), step, loss))

            log('Input: %s' % sequence_to_text(input_seq))
Beispiel #16
0
def train(log_dir, args):
    commit = get_git_commit() if args.git else 'None'
    checkpoint_path = os.path.join(log_dir, 'model.ckpt')
    ## input path is lists of both postive path and negtiva path
    input_path_pos = os.path.join(args.base_dir, args.input_pos)
    input_path_neg = os.path.join(args.base_dir, args.input_neg)

    log('Checkpoint path: %s' % checkpoint_path)
    log('Loading positive training data from: %s' % input_path_pos)
    log('Loading negative training data from: %s' % input_path_neg)
    log('Using model: %s' % args.model)
    log(hparams_debug_string())

    # Set up DataFeeder:
    coord = tf.train.Coordinator()
    with tf.variable_scope('datafeeder') as scope:
        feeder = DataFeeder(coord, input_path_pos, input_path_neg, hparams)

    # Set up model:
    global_step = tf.Variable(0, name='global_step', trainable=False)
    with tf.variable_scope('model') as scope:
        model = create_model(args.model, hparams)
        model.initialize(feeder.inputs_pos, feeder.input_lengths_pos,
                         feeder.mel_targets_pos, feeder.linear_targets_pos,
                         feeder.mel_targets_neg, feeder.linear_targets_neg,
                         feeder.labels_pos, feeder.labels_neg)
        model.add_loss()
        model.add_optimizer(global_step)

    # Bookkeeping:
    step = 0
    time_window = ValueWindow(100)
    loss_window = ValueWindow(100)
    saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2)

    # Train!
    with tf.Session() as sess:
        try:
            #summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
            sess.run(tf.global_variables_initializer())

            if args.restore_step:
                # Restore from a checkpoint if the user requested it.
                restore_path = '%s-%d' % (checkpoint_path, args.restore_step)
                saver.restore(sess, restore_path)
                log('Resuming from checkpoint: %s at commit: %s' %
                    (restore_path, commit),
                    slack=True)
            else:
                log('Starting new training run at commit: %s' % commit,
                    slack=True)

            feeder.start_in_session(sess)

            while not coord.should_stop():
                start_time = time.time()
                # train d
                sess.run(model.d_optimize)
                # train g
                step, rec_loss, style_loss, d_loss, g_loss, _ = sess.run([
                    global_step, model.rec_loss, model.style_loss,
                    model.d_loss, model.g_loss, model.g_optimize
                ])
                time_window.append(time.time() - start_time)
                message = 'Step %-7d [%.03f sec/step, rec_loss=%.05f, style_loss=%.05f, d_loss=%.05f, g_loss=%.05f]' % (
                    step, time_window.average, rec_loss, style_loss, d_loss,
                    g_loss)
                log(message, slack=(step % args.checkpoint_interval == 0))

                if step % args.checkpoint_interval == 0:
                    log('Saving checkpoint to: %s-%d' %
                        (checkpoint_path, step))
                    saver.save(sess, checkpoint_path, global_step=step)
                    log('Saving audio and alignment...')
                    input_seq, spectrogram_pos, spectrogram_neg, alignment_pos, alignment_neg = sess.run(
                        [
                            model.inputs[0], model.linear_outputs_pos[0],
                            model.linear_outputs_neg[0],
                            model.alignments_pos[0], model.alignments_neg[0]
                        ])

                    waveform_pos = audio.inv_spectrogram(spectrogram_pos.T)
                    waveform_neg = audio.inv_spectrogram(spectrogram_neg.T)
                    audio.save_wav(
                        waveform_pos,
                        os.path.join(log_dir, 'step-%d-audio_pos.wav' % step))
                    audio.save_wav(
                        waveform_neg,
                        os.path.join(log_dir, 'step-%d-audio_neg.wav' % step))
                    plot.plot_alignment(
                        alignment_pos,
                        os.path.join(log_dir, 'step-%d-align_pos.png' % step),
                        info='%s, %s, %s, step=%d, loss=%.5f' %
                        (args.model, commit, time_string(), step, rec_loss))
                    plot.plot_alignment(
                        alignment_neg,
                        os.path.join(log_dir, 'step-%d-align_neg.png' % step),
                        info='%s, %s, %s, step=%d, loss=%.5f' %
                        (args.model, commit, time_string(), step, rec_loss))
                    log('Input: %s' % sequence_to_text(input_seq))

        except Exception as e:
            log('Exiting due to exception: %s' % e, slack=True)
            traceback.print_exc()
            coord.request_stop(e)
Beispiel #17
0
def testing():
    spectrogram = np.load('olli-mel-00001.npy')
    wav = audio.inv_spectrogram(spectrogram.T)
    audio.save_wav(wav, 'test-audio.wav')
Beispiel #18
0
def train(log_dir, args):
    commit = get_git_commit() if args.git else 'None'
    checkpoint_path = os.path.join(log_dir, 'model.ckpt')
    input_path = os.path.join(args.base_dir, args.input)
    log('Checkpoint path: %s' % checkpoint_path)
    log('Loading training data from: %s' % input_path)
    log('Using model: %s' % args.model)
    log(hparams_debug_string())

    # Set up DataFeeder:
    coord = tf.train.Coordinator()
    with tf.variable_scope('datafeeder') as _:
        feeder = DataFeeder(coord, input_path, hparams)

    # Set up model:
    global_step = tf.Variable(0, name='global_step', trainable=False)
    with tf.variable_scope('model') as _:
        model = create_model(args.model, hparams)
        model.initialize(feeder.inputs, args.vgg19_pretrained_model,
                         feeder.mel_targets, feeder.linear_targets)
        model.add_loss()
        model.add_optimizer(global_step)
        stats = add_stats(model)

    # Bookkeeping:
    time_window = ValueWindow()
    loss_window = ValueWindow()
    saver = tf.train.Saver(keep_checkpoint_every_n_hours=2)

    # Train!
    with tf.Session() as sess:
        try:
            train_start_time = time.time()
            summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
            sess.run(tf.global_variables_initializer())

            if args.restore_step:
                # Restore from a checkpoint if the user requested it.
                restore_path = '%s-%d' % (checkpoint_path, args.restore_step)
                checkpoint_saver = tf.train.import_meta_graph(
                    '%s.%s' % (restore_path, 'meta'))
                checkpoint_saver.restore(sess, restore_path)
                log('Resuming from checkpoint: %s at commit: %s' %
                    (restore_path, commit))
            else:
                log('Starting new training run at commit: %s' % commit)

            feeder.start_in_session(sess)

            while not coord.should_stop():
                start_time = time.time()
                step, loss, opt = sess.run(
                    [global_step, model.loss, model.optimize])
                time_window.append(time.time() - start_time)
                loss_window.append(loss)
                message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % (
                    step, time_window.average, loss, loss_window.average)
                log(message, slack=(step % args.summary_interval == 0))

                if loss > 100 or math.isnan(loss):
                    log('Loss exploded to %.05f at step %d!' % (loss, step))
                    raise Exception('Loss Exploded')

                if step % args.summary_interval == 0:
                    log('Writing summary at step: %d' % step)
                    summary_writer.add_summary(sess.run(stats), step)

                if step % args.checkpoint_interval == 0:
                    log('Saving checkpoint to: %s-%d' %
                        (checkpoint_path, step))
                    saver.save(sess, checkpoint_path, global_step=step)
                    log('Saving audio...')
                    _, spectrogram, _ = sess.run([
                        model.inputs[0], model.linear_outputs[0],
                        model.alignments[0]
                    ])
                    waveform = audio.inv_spectrogram(spectrogram.T)
                    audio_path = os.path.join(log_dir,
                                              'step-%d-audio.wav' % step)
                    audio.save_wav(waveform, audio_path)

                    infolog.upload_to_slack(audio_path, step)

                    time_so_far = time.time() - train_start_time
                    hrs, rest = divmod(time_so_far, 3600)
                    min, secs = divmod(rest, 60)
                    log('{:.0f} hrs, {:.0f}mins and {:.1f}sec since the training process began'
                        .format(hrs, min, secs))

                if asked_to_stop(step):
                    coord.request_stop()

        except Exception as e:
            log('@channel: Exiting due to exception: %s' % e)
            traceback.print_exc()
            coord.request_stop(e)
Beispiel #19
0
def train(log_dir, args, input):
    commit = get_git_commit() if args.git else 'None'
    checkpoint_path = os.path.join(log_dir, 'model.ckpt')
    input_path = os.path.join(args.base_dir, input)
    log('Checkpoint path: %s' % checkpoint_path)
    log('Loading training data from: %s' % input_path)
    log('Using model: %s' % args.variant)
    log(hparams_debug_string())

    # Set up DataFeeder:
    coord = tf.train.Coordinator()
    with tf.variable_scope('datafeeder') as scope:
        if args.eal_dir:
            from tacotron.datafeeder import DataFeeder_EAL
            feeder = DataFeeder_EAL(coord, input_path, hparams, args.eal_dir)
        else:
            from tacotron.datafeeder import DataFeeder
            feeder = DataFeeder(coord, input_path, hparams)

    # Set up model:
    global_step = tf.Variable(0, name='global_step', trainable=False)
    with tf.variable_scope('model') as scope:
        model = create_model(args.variant, hparams)
        if args.eal_dir:
            model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets,
                             feeder.linear_targets, feeder.pml_targets, is_training=True, 
                             eal=True, locked_alignments=feeder.locked_alignments, 
                             flag_trainAlign=args.eal_trainAlign, flag_trainJoint=args.eal_trainJoint, alignScale=args.eal_alignScale)
        else:
            model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets,
                             feeder.linear_targets, feeder.pml_targets, is_training=True, 
                             gta=True)
        model.add_loss()
        model.add_optimizer(global_step)
        stats = add_stats(model, eal_dir=args.eal_dir)

    # Bookkeeping:
    step = 0
    time_window = ValueWindow(100)
    loss_window = ValueWindow(100)
    saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2)

    # Set up fixed alignment synthesizer
    alignment_synth = AlignmentSynthesizer()

    # Set up text for synthesis
    fixed_sentence = 'Scientists at the CERN laboratory say they have discovered a new particle.'

    # Set up denormalisation parameters for synthesis
    mean_path = os.path.abspath(os.path.join(args.base_dir, input, '..', 'pml_data/mean.dat'))
    std_path = os.path.abspath(os.path.join(args.base_dir, input, '..', 'pml_data/std.dat'))
    log('Loading normalisation mean from: {}'.format(mean_path))
    log('Loading normalisation standard deviation from: {}'.format(std_path))
    mean_norm = None
    std_norm = None

    if os.path.isfile(mean_path) and os.path.isfile(std_path):
        mean_norm = np.fromfile(mean_path, 'float32')
        std_norm = np.fromfile(std_path, 'float32')

    # Train!
#     import pdb
#     flag_pdb = False
#     pdb.set_trace()
#     args.checkpoint_interval = 2
#     args.num_steps = 5
    
    with tf.Session() as sess:
        try:
            summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
            sess.run(tf.global_variables_initializer())
            
#             pdb.set_trace()
            
            if args.restore_step:
                # Restore from a checkpoint if the user requested it.
                restore_path = '%s-%d' % (checkpoint_path, args.restore_step)
                saver.restore(sess, restore_path)
                log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True)
            elif args.eal_dir and args.eal_ckpt:
                if args.eal_trainAlign or args.eal_trainJoint:
                    list_var = tf.trainable_variables() + [v for v in tf.global_variables() if 'moving' in v.name]
                    saver_eal = tf.train.Saver(list_var)
                    saver_eal.restore(sess, args.eal_ckpt)
                    log('Loaded weights and batchNorm cache of checkpoint: %s at commit: %s' % (args.eal_ckpt, commit), slack=True)
                elif args.eal_ft:
                    saver.restore(sess, args.eal_ckpt)
                    log('Refining the model from checkpoint: %s at commit: %s' % (args.eal_ckpt, commit), slack=True)
                else:
                    list_var = [var for var in tf.global_variables() if 'optimizer' not in var.name]
                    saver_eal = tf.train.Saver(list_var)
                    saver_eal.restore(sess, args.eal_ckpt)
                    log('Initializing the weights from checkpoint: %s at commit: %s' % (args.eal_ckpt, commit), slack=True)
#                 args.num_steps *= 2
#                 sess.run(global_step.assign(0))
            else:
                log('Starting new training run at commit: %s' % commit, slack=True)

            feeder.start_in_session(sess)
            step = 0  # initialise step variable so can use in while condition
            
            while not coord.should_stop() and step <= args.num_steps:
                
#                 pdb.set_trace()
                                
                start_time = time.time()
                if args.eal_trainAlign:
                    step, loss, loss_align, opt = sess.run([global_step, model.loss, model.loss_align, model.optimize])
#                     try:
#                         step, loss, loss_align, opt, tmp_a, tmp_ar = sess.run([global_step, model.loss, model.loss_align, model.optimize, 
#                                                                                model.alignments, model.alignments_ref])
#                     except:
#                         print("Oops!",sys.exc_info()[0],"occured.")
#                         flag_pdb = True
#                     if flag_pdb or np.isnan(loss_align):
#                         pdb.set_trace()
#                         flag_pdb = False
                    time_window.append(time.time() - start_time)
                    loss_window.append(loss_align)
                    message = 'Step %-7d [%.03f sec/step, loss=%.05f, loss_align=%.05f, avg_loss_align=%.05f]' % (
                        step, time_window.average, loss, loss_align, loss_window.average)
                elif args.eal_trainJoint:
                    step, loss, loss_align, loss_joint, opt = sess.run([global_step, model.loss, model.loss_align, 
                                                                        model.loss_joint, model.optimize])
                    time_window.append(time.time() - start_time)
                    loss_window.append(loss_joint)
                    message = 'Step %-7d [%.03f sec/step, loss=%.05f, loss_align=%.05f, avg_loss_joint=%.05f]' % (
                        step, time_window.average, loss, loss_align, loss_window.average)
                else:
                    step, loss, opt = sess.run([global_step, model.loss, model.optimize])
                    time_window.append(time.time() - start_time)
                    loss_window.append(loss)
                    message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % (
                        step, time_window.average, loss, loss_window.average)
                log(message, slack=(step % args.checkpoint_interval == 0))
                
                if loss > 100 or math.isnan(loss):
                    log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True)
                    raise Exception('Loss Exploded')

                if step % args.summary_interval == 0:
                    log('Writing summary at step: %d' % step)
                    summary_writer.add_summary(sess.run(stats), step)

                if step % args.checkpoint_interval == 0:
                    log('Saving checkpoint to: %s-%d' % (checkpoint_path, step))
                    saver.save(sess, checkpoint_path, global_step=step)
                    log('Saving audio and alignment...')
                    summary_elements = []

                    # if the model has linear spectrogram features, use them to synthesize audio
                    if hasattr(model, 'linear_targets'):
                        input_seq, alignment, target_spectrogram, spectrogram = sess.run([
                            model.inputs[0], model.alignments[0], model.linear_targets[0], model.linear_outputs[0]])

                        output_waveform = audio.inv_spectrogram(spectrogram.T)
                        target_waveform = audio.inv_spectrogram(target_spectrogram.T)
                        audio.save_wav(output_waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step))
                        audio.save_wav(target_waveform, os.path.join(log_dir, 'step-%d-target-audio.wav' % step))
                    # otherwise, synthesize audio from PML vocoder features
                    elif hasattr(model, 'pml_targets'):
                        input_seq, alignment, target_pml_features, pml_features = sess.run([
                            model.inputs[0], model.alignments[0], model.pml_targets[0], model.pml_outputs[0]])

                        cfg = Configuration(hparams.sample_rate, hparams.pml_dimension)
                        synth = PMLSynthesizer(cfg)
                        output_waveform = synth.pml_to_wav(pml_features, mean_norm=mean_norm, std_norm=std_norm,
                                                           spec_type=hparams.spec_type)
                        target_waveform = synth.pml_to_wav(target_pml_features, mean_norm=mean_norm, std_norm=std_norm,
                                                           spec_type=hparams.spec_type)

                        sp.wavwrite(os.path.join(log_dir, 'step-%d-target-audio.wav' % step), target_waveform,
                                    hparams.sample_rate, norm_max_ifneeded=True)
                        sp.wavwrite(os.path.join(log_dir, 'step-%d-audio.wav' % step), output_waveform,
                                    hparams.sample_rate, norm_max_ifneeded=True)

                    # we need to adjust the output and target waveforms so the values lie in the interval [-1.0, 1.0]
                    output_waveform /= 1.05 * np.max(np.abs(output_waveform))
                    target_waveform /= 1.05 * np.max(np.abs(target_waveform))

                    summary_elements.append(
                        tf.summary.audio('ideal-%d' % step, np.expand_dims(target_waveform, 0), hparams.sample_rate),
                    )

                    summary_elements.append(
                        tf.summary.audio('sample-%d' % step, np.expand_dims(output_waveform, 0), hparams.sample_rate),
                    )

                    # get the alignment for the top sentence in the batch
                    random_attention_plot = plot.plot_alignment(alignment, os.path.join(log_dir,
                                                                                        'step-%d-random-align.png' % step),
                                                                info='%s, %s, %s, step=%d, loss=%.5f' % (
                                                                args.variant, commit, time_string(), step, loss))

                    summary_elements.append(
                        tf.summary.image('attention-%d' % step, random_attention_plot),
                    )

                    # also process the alignment for a fixed sentence for comparison
                    alignment_synth.load('%s-%d' % (checkpoint_path, step), hparams, model_name=args.variant)
                    fixed_alignment = alignment_synth.synthesize(fixed_sentence)
                    fixed_attention_plot = plot.plot_alignment(fixed_alignment,
                                                               os.path.join(log_dir, 'step-%d-fixed-align.png' % step),
                                                               info='%s, %s, %s, step=%d, loss=%.5f' % (
                                                               args.variant, commit, time_string(), step, loss))

                    summary_elements.append(
                        tf.summary.image('fixed-attention-%d' % step, fixed_attention_plot),
                    )

                    # save the audio and alignment to tensorboard (audio sample rate is hyperparameter)
                    merged = sess.run(tf.summary.merge(summary_elements))

                    summary_writer.add_summary(merged, step)

                    log('Input: %s' % sequence_to_text(input_seq))

        except Exception as e:
            log('Exiting due to exception: %s' % e, slack=True)
            traceback.print_exc()
            coord.request_stop(e)
Beispiel #20
0
def train(log_dir, args):
    commit = get_git_commit() if args.git else 'None'
    checkpoint_path = os.path.join(log_dir, 'model.ckpt')
    input_path = os.path.join(args.base_dir, args.input)
    log('Checkpoint path: %s' % checkpoint_path)
    log('Loading training data from: %s' % input_path)
    log('Using model: %s' % args.model)
    log(hparams_debug_string())

    ps_hosts = args.ps_hosts.split(",")
    worker_hosts = args.worker_hosts.split(",")
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
    server = tf.train.Server(cluster,
                             job_name=args.job_name,
                             task_index=args.task_index)

    # Block further graph execution if current node is parameter server
    if args.job_name == "ps":
        server.join()

    with tf.device(
            tf.train.replica_device_setter(
                worker_device="/job:worker/task:%d" % args.task_index,
                cluster=cluster)):

        # Set up DataFeeder:
        coord = tf.train.Coordinator()
        with tf.variable_scope('datafeeder') as scope:
            feeder = DataFeeder(coord, input_path, hparams)

        # Set up model:
        global_step = tf.Variable(0, name='global_step', trainable=False)
        with tf.variable_scope('model') as scope:
            model = create_model(args.model, hparams)
            model.initialize(feeder.inputs, feeder.input_lengths,
                             feeder.mel_targets, feeder.linear_targets)
            model.add_loss()
            model.add_optimizer(global_step)
            stats = add_stats(model)

        # Bookkeeping:
        step = 0
        time_window = ValueWindow(100)
        loss_window = ValueWindow(100)
        saver = tf.train.Saver(max_to_keep=5,
                               keep_checkpoint_every_n_hours=2,
                               sharded=True)

        hooks = [tf.train.StopAtStepHook(last_step=1000000)]
        # Train!
        # Monitored... automatycznie wznawia z checkpointu.
        is_chief = (args.task_index == 0)
        init_op = tf.global_variables_initializer()
        sv = tf.train.Supervisor(is_chief=(args.task_index == 0),
                                 logdir="train_logs",
                                 init_op=init_op,
                                 summary_op=stats,
                                 saver=saver,
                                 save_model_secs=600)
        with sv.managed_session(server.target) as sess:
            try:

                summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
                sess.run(init_op)

                if args.restore_step and is_chief:
                    # Restore from a checkpoint if the user requested it.
                    restore_path = '%s-%d' % (checkpoint_path,
                                              args.restore_step)
                    saver.restore(sess, restore_path)
                    log('Resuming from checkpoint: %s at commit: %s' %
                        (restore_path, commit),
                        slack=True)
                else:
                    log('Starting new training run at commit: %s' % commit,
                        slack=True)

                feeder.start_in_session(sess)

                while not coord.should_stop():
                    start_time = time.time()
                    step, loss, opt = sess.run(
                        [global_step, model.loss, model.optimize])
                    time_window.append(time.time() - start_time)
                    loss_window.append(loss)
                    message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % (
                        step, time_window.average, loss, loss_window.average)
                    log(message, slack=(step % args.checkpoint_interval == 0))

                    if loss > 100 or math.isnan(loss):
                        log('Loss exploded to %.05f at step %d!' %
                            (loss, step),
                            slack=True)
                        raise Exception('Loss Exploded')

                    if step % args.summary_interval == 0:
                        log('Writing summary at step: %d' % step)
                        summary_writer.add_summary(sess.run(stats), step)

                    if step % args.checkpoint_interval == 0 and is_chief:
                        log('Saving checkpoint to: %s-%d' %
                            (checkpoint_path, step))
                        saver.save(sess, checkpoint_path, global_step=step)
                        log('Saving audio and alignment...')
                        input_seq, spectrogram, alignment = sess.run([
                            model.inputs[0], model.linear_outputs[0],
                            model.alignments[0]
                        ])
                        waveform = audio.inv_spectrogram(spectrogram.T)
                        audio.save_wav(
                            waveform,
                            os.path.join(log_dir, 'step-%d-audio.wav' % step))
                        plot.plot_alignment(
                            alignment,
                            os.path.join(log_dir, 'step-%d-align.png' % step),
                            info='%s, %s, %s, step=%d, loss=%.5f' %
                            (args.model, commit, time_string(), step, loss))
                        log('Input: %s' % sequence_to_text(input_seq))

            except Exception as e:
                log('Exiting due to exception: %s' % e, slack=True)
                traceback.print_exc()
                coord.request_stop(e)
Beispiel #21
0
# dirFile = '/scratch/je369/tacotron/lj-training/{}.npy'.format(name)

# dirFile = '/home/dawna/tts/qd212/models/tacotron/results/logs-tacotron-bk2orig-asup/step-5-target-spec.npy'
# target_spectrogram = np.load(dirFile)
# log_dir = '/home/dawna/tts/qd212/models/tacotron/results/logs-tacotron-bk2orig-asup'
# target_waveform = audio.inv_spectrogram(target_spectrogram.T)
# audio.save_wav(target_waveform, os.path.join(log_dir, 'step-%d-target-audio.wav' % 23333333333))

# name = 'eval-0'
# dirFile_tmp = '/home/dawna/tts/qd212/models/tacotron/results/tacotron-bk2orig/eval/%s/%s'

# dirFile_tmp = '/home/dawna/tts/qd212/models/tacotron/results/tacotron-bk2orig/gta/%s/%s'
# dirFile_tmp = '/home/dawna/tts/qd212/models/tacotron/results/tacotron-bk2orig/eal/%s/%s'
# dirFile_tmp = '/home/dawna/tts/qd212/models/tacotron/results/tacotron-bk2orig/eval/%s/%s'

dirFile_tmp = '/home/dawna/tts/qd212/models/tacotron/results/tacotron-bk2orig-eal-scratch/eval/%s/%s'

os.makedirs(dirFile_tmp % ('wav', ''), exist_ok=True)
# name_list = ['LJ001-0073','LJ001-0001','LJ001-0002','LJ001-0003']:
# name_list = ['eval-0','eval-1','eval-2','eval-3']:
# name_list = ['LJ001-0001','LJ001-0002','LJ001-0003']
name_list = [
    'LJ001-0073', 'LJ003-0229', 'LJ003-0296', 'LJ003-0304', 'LJ004-0208'
]
for name in name_list:
    dirFile_npy = dirFile_tmp % ('npy', name + '.npy')
    dirFile_wav = dirFile_tmp % ('wav', name + '.wav')
    target_spectrogram = np.load(dirFile_npy)
    target_waveform = audio.inv_spectrogram(target_spectrogram.T)
    audio.save_wav(target_waveform, dirFile_wav)
Beispiel #22
0
def train(log_dir, args):
  commit = get_git_commit() if args.git else 'None'
  checkpoint_path = os.path.join(log_dir, 'model.ckpt')
  input_path = os.path.join(args.base_dir, args.input)
  log('Checkpoint path: %s' % checkpoint_path)
  log('Loading training data from: %s' % input_path)
  log('Using model: %s' % args.model)
  log(hparams_debug_string())

  # Set up DataFeeder:
  coord = tf.train.Coordinator()
  with tf.variable_scope('datafeeder') as scope:
    feeder = DataFeeder(coord, input_path, hparams)

  # Set up model:
  global_step = tf.Variable(0, name='global_step', trainable=False)
  with tf.variable_scope('model') as scope:
    model = create_model(args.model, hparams)
    model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets)
    model.add_loss()
    model.add_optimizer(global_step)
    stats = add_stats(model)

  # Bookkeeping:
  step = 0
  time_window = ValueWindow(100)
  loss_window = ValueWindow(100)
  saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2)

  # Train!
  with tf.Session() as sess:
    try:
      summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
      sess.run(tf.global_variables_initializer())

      if args.restore_step:
        # Restore from a checkpoint if the user requested it.
        restore_path = '%s-%d' % (checkpoint_path, args.restore_step)
        saver.restore(sess, restore_path)
        log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True)
      else:
        log('Starting new training run at commit: %s' % commit, slack=True)

      feeder.start_in_session(sess)

      while not coord.should_stop():
        start_time = time.time()
        step, loss, opt = sess.run([global_step, model.loss, model.optimize])
        time_window.append(time.time() - start_time)
        loss_window.append(loss)
        message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % (
          step, time_window.average, loss, loss_window.average)
        log(message, slack=(step % args.checkpoint_interval == 0))

        if loss > 100 or math.isnan(loss):
          log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True)
          raise Exception('Loss Exploded')

        if step % args.summary_interval == 0:
          log('Writing summary at step: %d' % step)
          summary_writer.add_summary(sess.run(stats), step)

        if step % args.checkpoint_interval == 0:
          log('Saving checkpoint to: %s-%d' % (checkpoint_path, step))
          saver.save(sess, checkpoint_path, global_step=step)
          log('Saving audio and alignment...')
          input_seq, spectrogram, alignment = sess.run([
            model.inputs[0], model.linear_outputs[0], model.alignments[0]])
          waveform = audio.inv_spectrogram(spectrogram.T)
          audio.save_wav(waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step))
          plot.plot_alignment(alignment, os.path.join(log_dir, 'step-%d-align.png' % step),
            info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss))
          log('Input: %s' % sequence_to_text(input_seq))

    except Exception as e:
      log('Exiting due to exception: %s' % e, slack=True)
      traceback.print_exc()
      coord.request_stop(e)
Beispiel #23
0
def train(log_dir, args):
  checkpoint_path = os.path.join(log_dir, 'model.ckpt')
  input_path = os.path.join(args.base_dir, args.input)

  coord = tf.train.Coordinator()
  with tf.variable_scope('datafeeder') as scope:
    feeder = DataFeeder(coord, input_path, hparams)

  global_step = tf.Variable(0, name='global_step', trainable=False)
  with tf.variable_scope('model') as scope:
    model = create_model(args.model, hparams)
    model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets)
    model.add_loss()
    model.add_optimizer(global_step)
    stats = add_stats(model)

  # Bookkeeping
  time_window = ValueWindow(100)
  loss_window = ValueWindow(100)
  saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2)

  # Train
  with tf.Session() as sess:
    try:
      summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
      sess.run(tf.global_variables_initializer())

      if args.restore_step:
        restore_path = '%s-%d' % (checkpoint_path, args.restore_step)
        saver.restore(sess, restore_path)
        log('Resuming from checkpoint: %s' % (restore_path), slack=True)

      feeder.start_in_session(sess)

      while not coord.should_stop():
        start_time = time.time()
        step, loss, opt, mel_loss, linear_loss = \
          sess.run([global_step, model.loss, model.optimize, model.mel_loss, model.linear_loss])
        time_window.append(time.time() - start_time)
        loss_window.append(loss)
        message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f, mel_loss=%.5f, linear_loss=%.5f]' % (
          step, time_window.average, loss, loss_window.average, mel_loss, linear_loss)
        log(message, slack=(step % args.checkpoint_interval == 0))

        if step % args.summary_interval == 0:
          log('Writing summary at step: %d' % step)
          summary_writer.add_summary(sess.run(stats), step)

        if step % args.checkpoint_interval == 0:
          log('Saving checkpoint to: %s-%d' % (checkpoint_path, step))
          saver.save(sess, checkpoint_path, global_step=step)
          log('Saving audio and alignment...')
          input_seq, spectrogram, alignment = sess.run([
            model.inputs[0], model.linear_outputs[0], model.alignments[0]])
          waveform = audio.inv_spectrogram(spectrogram.T)
          audio.save_wav(waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step))
          input_seq = sequence_to_text(input_seq)
          plot.plot_alignment(alignment, os.path.join(log_dir, 'step-%d-align.png' % step), input_seq,
            info='%s, step=%d, loss=%.5f' % (args.model, step, loss), istrain=1)
          log('Input: %s' % input_seq)

    except Exception as e:
      log('Exiting due to exception: %s' % e, slack=True)
      traceback.print_exc()
      coord.request_stop(e)