예제 #1
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()

        with self.graph.as_default():
            self.x, self.y, self.z, self.num_batch = get_batch(
                is_training=is_training)
            self.decoder_inputs = shift_by_one(self.y)

            # Encoder
            self.memory = encode(self.x, is_training=is_training)

            # Decoder
            self.outputs1 = decode1(self.decoder_inputs,
                                    self.memory)  #, hp.n_mels, 1+hp.n_fft/2)
            self.outputs2 = decode2(
                self.outputs1,
                is_training=is_training)  #, hp.n_mels, 1+hp.n_fft/2)

            # L1 loss
            self.loss = tf.reduce_mean(tf.abs(self.outputs1 - self.y)) +\
                        tf.reduce_mean(tf.abs(self.outputs2 - self.z))

            if is_training:
                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                self.train_op = self.optimizer.minimize(
                    self.loss, global_step=self.global_step)

                # Summmary
                tf.summary.scalar('loss', self.loss)
                self.merged = tf.summary.merge_all()
예제 #2
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()

        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.z, self.num_batch = get_batch()
            else:  # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(None, None))
                self.y = tf.placeholder(tf.float32,
                                        shape=(None, None, hp.n_mels * hp.r))

            self.decoder_inputs = shift_by_one(self.y)
            with tf.variable_scope("net"):
                # Encoder
                self.memory = encode(self.x,
                                     is_training=is_training)  # (N, T, E)

                # Decoder
                self.outputs1 = decode1(
                    self.decoder_inputs, self.memory,
                    is_training=is_training)  # (N, T', hp.n_mels*hp.r)
                self.outputs2 = decode2(
                    self.outputs1,
                    is_training=is_training)  # (N, T', (1+hp.n_fft//2)*hp.r)

            if is_training:
                # Loss
                if hp.loss_type == "l1":  # L1 loss
                    self.loss1 = tf.abs(self.outputs1 - self.y)
                    self.loss2 = tf.abs(self.outputs2 - self.z)
                else:  # L2 loss
                    self.loss1 = tf.squared_difference(self.outputs1, self.y)
                    self.loss2 = tf.squared_difference(self.outputs2, self.z)

                # Target masking
                if hp.target_zeros_masking:
                    self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.))
                    self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.))

                self.mean_loss1 = tf.reduce_mean(self.loss1)
                self.mean_loss2 = tf.reduce_mean(self.loss2)
                self.mean_loss = self.mean_loss1 + self.mean_loss2

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                self.train_op = self.optimizer.minimize(
                    self.mean_loss, global_step=self.global_step)

                # Summmary
                tf.summary.scalar('mean_loss1', self.mean_loss1)
                tf.summary.scalar('mean_loss2', self.mean_loss2)
                tf.summary.scalar('mean_loss', self.mean_loss)
                self.merged = tf.summary.merge_all()
예제 #3
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()

        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.z, self.num_batch = get_batch()
                self.decoder_inputs = shift_by_one(self.y)

                # Note that batch size was multiplied by # gpus.
                # Now we split the mini-batch data by # gpus.
                self.x = tf.split(self.x, hp.num_gpus, 0)
                self.y = tf.split(self.y, hp.num_gpus, 0)
                self.z = tf.split(self.z, hp.num_gpus, 0)
                self.decoder_inputs = tf.split(self.decoder_inputs,
                                               hp.num_gpus, 0)

                # optimizer
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)

                self.losses, self.grads_and_vars_list = [], []
                for i in range(hp.num_gpus):
                    with tf.variable_scope('net', reuse=bool(i)):
                        with tf.device('/gpu:{}'.format(i)):
                            with tf.name_scope('gpu_{}'.format(i)):
                                # Encoder
                                self.memory = encode(
                                    self.x[i],
                                    is_training=is_training)  # (N, T, E)

                                # Decoder
                                self.outputs1 = decode1(
                                    self.decoder_inputs[i],
                                    self.memory)  # (N, T', hp.n_mels*hp.r)
                                self.outputs2 = decode2(
                                    self.outputs1, is_training=is_training
                                )  # (N, T', (1+hp.n_fft//2)*hp.r)

                                # Loss
                                if hp.loss_type == "l1":  # L1 loss
                                    self.loss1 = tf.abs(self.outputs1 -
                                                        self.y[i])
                                    self.loss2 = tf.abs(self.outputs2 -
                                                        self.z[i])
                                else:  # L2 loss
                                    self.loss1 = tf.squared_difference(
                                        self.outputs1, self.y[i])
                                    self.loss2 = tf.squared_difference(
                                        self.outputs2, self.z[i])

                                # Target masking
                                if hp.target_zeros_masking:
                                    self.loss1 *= tf.to_float(
                                        tf.not_equal(self.y[i], 0.))
                                    self.loss2 *= tf.to_float(
                                        tf.not_equal(self.z[i], 0.))

                                self.mean_loss1 = tf.reduce_mean(self.loss1)
                                self.mean_loss2 = tf.reduce_mean(self.loss2)
                                self.mean_loss = self.mean_loss1 + self.mean_loss2

                                self.losses.append(self.mean_loss)
                                self.grads_and_vars = self.optimizer.compute_gradients(
                                    self.mean_loss)
                                self.grads_and_vars_list.append(
                                    self.grads_and_vars)

                with tf.device('/cpu:0'):
                    # Aggregate losses, then calculate average loss.
                    self.loss = tf.add_n(self.losses) / len(self.losses)

                    #Aggregate gradients, then calculate average gradients.
                    self.mean_grads_and_vars = []
                    for grads_and_vars in zip(*self.grads_and_vars_list):
                        grads = []
                        for grad, var in grads_and_vars:
                            grads.append(tf.expand_dims(grad, 0))
                        mean_grad = tf.reduce_mean(tf.concat(grads, 0), 0)  #()
                        self.mean_grads_and_vars.append((mean_grad, var))

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.train_op = self.optimizer.apply_gradients(
                    self.mean_grads_and_vars, self.global_step)

                # Summmary
                tf.summary.scalar('loss', self.loss)
                self.merged = tf.summary.merge_all()

            else:  # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(None, None))
                self.decoder_inputs = tf.placeholder(tf.float32,
                                                     shape=(None, None,
                                                            hp.n_mels * hp.r))

                # Encoder
                self.memory = encode(self.x,
                                     is_training=is_training)  # (N, T, E)

                # Decoder
                self.outputs1 = decode1(self.decoder_inputs,
                                        self.memory)  # (N, T', hp.n_mels*hp.r)
                self.outputs2 = decode2(
                    self.outputs1,
                    is_training=is_training)  # (N, T', (1+hp.n_fft//2)*hp.r)
예제 #4
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()

        with self.graph.as_default():
            # Build vocab
            if is_training:
                _, idx2char = learn_vocab()
                store_vocab(idx2char)

            if is_training:
                self.x, self.y, self.z, self.num_batch = get_batch()
            else:  # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(None, None))
                self.y = tf.placeholder(tf.float32,
                                        shape=(None, None, hp.n_mels * hp.r))

            self.decoder_inputs = shift_by_one(self.y)

            with tf.variable_scope("net"):
                # Encoder
                self.memory = encode(self.x,
                                     is_training=is_training)  # (N, T, E)

                # Decoder
                self.outputs1 = decode1(
                    self.decoder_inputs, self.memory,
                    is_training=is_training)  # (N, T', hp.n_mels*hp.r)
                self.outputs2 = decode2(
                    self.outputs1,
                    is_training=is_training)  # (N, T', (1+hp.n_fft//2)*hp.r)

            if is_training:
                # Loss
                if hp.loss_type == "l1":  # L1 loss
                    self.loss1 = tf.abs(self.outputs1 - self.y)
                    self.loss2 = tf.abs(self.outputs2 - self.z)
                else:  # L2 loss
                    self.loss1 = tf.squared_difference(self.outputs1, self.y)
                    self.loss2 = tf.squared_difference(self.outputs2, self.z)

                # Target masking
                if hp.target_zeros_masking:
                    self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.))
                    self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.))

                self.mean_loss1 = tf.reduce_mean(self.loss1)
                self.mean_loss2 = tf.reduce_mean(self.loss2)
                self.mean_loss = self.mean_loss1 + self.mean_loss2

                # Logging
                ## histograms
                self.expected1_h = tf.reduce_mean(tf.reduce_mean(self.y, -1),
                                                  0)
                self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1),
                                             0)

                self.expected2_h = tf.reduce_mean(tf.reduce_mean(self.z, -1),
                                                  0)
                self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1),
                                             0)

                ## images
                self.expected1_i = tf.expand_dims(
                    tf.reduce_mean(self.y[:1], -1, keep_dims=True), 1)
                self.got1_i = tf.expand_dims(
                    tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1)

                self.expected2_i = tf.expand_dims(
                    tf.reduce_mean(self.z[:1], -1, keep_dims=True), 1)
                self.got2_i = tf.expand_dims(
                    tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1)

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                self.train_op = self.optimizer.minimize(
                    self.mean_loss, global_step=self.global_step)

                # Summmary
                tf.summary.scalar('mean_loss1', self.mean_loss1)
                tf.summary.scalar('mean_loss2', self.mean_loss2)
                tf.summary.scalar('mean_loss', self.mean_loss)

                tf.summary.histogram('expected_values1', self.expected1_h)
                tf.summary.histogram('gotten_values1', self.got1_h)
                tf.summary.histogram('expected_values2', self.expected2_h)
                tf.summary.histogram('gotten values2', self.got2_h)

                tf.summary.image("expected_values1", self.expected1_i * 255)
                tf.summary.image("gotten_values1", self.got1_i * 255)
                tf.summary.image("expected_values2", self.expected2_i * 255)
                tf.summary.image("gotten_values2", self.got2_i * 255)

                self.merged = tf.summary.merge_all()
예제 #5
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        
        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.z, self.num_batch = get_batch()
            else: # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(None, None))
                self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r))

            self.decoder_inputs = shift_by_one(self.y)
            
            with tf.variable_scope("net"):
                # Encoder
                self.memory = encode(self.x, is_training=is_training) # (N, T, E)
                
                # Decoder 
                self.outputs1 = decode1(self.decoder_inputs, 
                                         self.memory,
                                         is_training=is_training) # (N, T', hp.n_mels*hp.r)
                self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r)
             
            if is_training:  
                # Loss
                if hp.loss_type=="l1": # L1 loss
                    self.loss1 = tf.abs(self.outputs1 - self.y)
                    self.loss2 = tf.abs(self.outputs2 - self.z)
                else: # L2 loss
                    self.loss1 = tf.squared_difference(self.outputs1, self.y)
                    self.loss2 = tf.squared_difference(self.outputs2, self.z)
                
                # Target masking
                if hp.target_zeros_masking:
                    self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.))
                    self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.))
                
                self.mean_loss1 = tf.reduce_mean(self.loss1)
                self.mean_loss2 = tf.reduce_mean(self.loss2)
                self.mean_loss = self.mean_loss1 + self.mean_loss2 
                
                # Logging  
                ## histograms
                self.expected1_h = tf.reduce_mean(tf.reduce_mean(self.y, -1), 0)
                self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1),0)
                
                self.expected2_h = tf.reduce_mean(tf.reduce_mean(self.z, -1), 0)
                self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1),0)
                
                ## images
                self.expected1_i = tf.expand_dims(tf.reduce_mean(self.y[:1], -1, keep_dims=True), 1)
                self.got1_i = tf.expand_dims(tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1)
                
                self.expected2_i = tf.expand_dims(tf.reduce_mean(self.z[:1], -1, keep_dims=True), 1)
                self.got2_i = tf.expand_dims(tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1)
                                                
                # Training Scheme
                self.global_step = tf.Variable(0, name='global_step', trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                self.train_op = self.optimizer.minimize(self.mean_loss, global_step=self.global_step)
                   
                # Summmary 
                tf.summary.scalar('mean_loss1', self.mean_loss1)
                tf.summary.scalar('mean_loss2', self.mean_loss2)
                tf.summary.scalar('mean_loss', self.mean_loss)
                
                tf.summary.histogram('expected_values1', self.expected1_h)
                tf.summary.histogram('gotten_values1', self.got1_h)
                tf.summary.histogram('expected_values2', self.expected2_h)
                tf.summary.histogram('gotten values2', self.got2_h)
                                
                tf.summary.image("expected_values1", self.expected1_i*255)
                tf.summary.image("gotten_values1", self.got1_i*255)
                tf.summary.image("expected_values2", self.expected2_i*255)
                tf.summary.image("gotten_values2", self.got2_i*255)
                
                self.merged = tf.summary.merge_all()
예제 #6
0
 def __init__(self, is_training=True):
     self.graph = tf.Graph()
     
     with self.graph.as_default():
         if is_training:
             self.x, self.y, self.z, self.num_batch = get_batch()
             self.decoder_inputs = shift_by_one(self.y)
             
             # Make sure that batch size was multiplied by # gpus.
             # Now we split the mini-batch data by # gpus.
             self.x = tf.split(self.x, hp.num_gpus, 0)
             self.y = tf.split(self.y, hp.num_gpus, 0)
             self.z = tf.split(self.z, hp.num_gpus, 0)
             self.decoder_inputs = tf.split(self.decoder_inputs, hp.num_gpus, 0)
             
             # Sequence lengths for masking
             self.x_lengths = tf.to_int32(tf.reduce_sum(tf.sign(tf.abs(self.x)), -1)) # (N,)
             self.x_masks = tf.to_float(tf.expand_dims(tf.sign(tf.abs(self.x)), -1)) # (N, T, 1)
             # optimizer
             self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
         
             self.losses, self.grads_and_vars_list = [], []
             for i in range(hp.num_gpus):
                 with tf.variable_scope('net', reuse=bool(i)):
                     with tf.device('/gpu:{}'.format(i)):
                         with tf.name_scope('gpu_{}'.format(i)):
                             # Encoder
                             self.memory = encode(self.x[i], is_training=is_training) # (N, T, E)
                             
                             # Decoder 
                             self.outputs1 = decode1(self.decoder_inputs[i], 
                                                      self.memory,
                                                      is_training=is_training) # (N, T', hp.n_mels*hp.r)
                             self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r)
           
                             # Loss
                             if hp.loss_type=="l1": # L1 loss
                                 self.loss1 = tf.abs(self.outputs1 - self.y[i])
                                 self.loss2 = tf.abs(self.outputs2 - self.z[i])
                             else: # L2 loss
                                 self.loss1 = tf.squared_difference(self.outputs1, self.y[i])
                                 self.loss2 = tf.squared_difference(self.outputs2, self.z[i])
                                 
                             # Target masking
                             if hp.target_zeros_masking:
                                 self.loss1 *= tf.to_float(tf.not_equal(self.y[i], 0.))
                                 self.loss2 *= tf.to_float(tf.not_equal(self.z[i], 0.))
                             
                             self.loss1 = tf.reduce_mean(self.loss1)
                             self.loss2 = tf.reduce_mean(self.loss2)
                             self.loss = self.loss1 + self.loss2   
                             
                             self.losses.append(self.loss)
                             self.grads_and_vars = self.optimizer.compute_gradients(self.loss) 
                             self.grads_and_vars_list.append(self.grads_and_vars)    
             
             with tf.device('/cpu:0'):
                 # Aggregate losses, then calculate average loss.
                 self.mean_loss = tf.add_n(self.losses) / len(self.losses)
                  
                 #Aggregate gradients, then calculate average gradients.
                 self.mean_grads_and_vars = []
                 for grads_and_vars in zip(*self.grads_and_vars_list):
                     grads = []
                     for grad, var in grads_and_vars:
                         if grad is not None:
                             grads.append(tf.expand_dims(grad, 0))
                     mean_grad = tf.reduce_mean(tf.concat(grads, 0), 0) #()
                     self.mean_grads_and_vars.append((mean_grad, var))
              
             # Training Scheme
             self.global_step = tf.Variable(0, name='global_step', trainable=False)
             self.train_op = self.optimizer.apply_gradients(self.mean_grads_and_vars, self.global_step)
              
             # Summmary 
             tf.summary.scalar('mean_loss', self.mean_loss)
             self.merged = tf.summary.merge_all()
             
         else: # Evaluation
             self.x = tf.placeholder(tf.int32, shape=(None, None))
             self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r))
             self.decoder_inputs = shift_by_one(self.y)
             with tf.variable_scope('net'):
                 # Encoder
                 self.memory = encode(self.x, is_training=is_training) # (N, T, E)
                  
                 # Decoder
                 self.outputs1 = decode1(self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r)
                 self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r)
예제 #7
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        with self.graph.as_default():
            if is_training:
                self.x, self.q, self.y, self.z, self.num_batch = get_batch()
            else:  # Evaluation
                self.x = tf.placeholder(tf.float32,
                                        shape=(None, None, hp.n_mels * hp.r))
                self.y = tf.placeholder(tf.float32,
                                        shape=(None, None, hp.n_mels * hp.r))

            #self.decoder_inputs = shift_by_one(self.y)

            with tf.variable_scope("Generator"):
                # Encoder
                self.memory_gen = encode(self.q,
                                         is_training=is_training)  # (N, T, E)

                # Decoder
                decode_length = int(
                    (hp.bin_size_y[1] * hp.sr - (hp.win_length - 1)) /
                    ((hp.hop_length) * hp.r))  # about 50
                self._outputs1_gen = tf.zeros(
                    [hp.batch_size, 1, hp.n_mels * hp.r])
                outputs1_gen_list = []
                for j in range(decode_length):
                    reuse = None if j == 0 else True
                    self._outputs1_gen += decode1(self._outputs1_gen,
                                                  self.memory_gen,
                                                  is_training=is_training,
                                                  reuse=reuse)
                    outputs1_gen_list.append(self._outputs1_gen)
                self.outputs1_gen = tf.concat(outputs1_gen_list, 1)
                self.outputs2_gen = decode2(self.outputs1_gen,
                                            is_training=is_training)
                # for b in range(hp.batch_size): #restore the linear spectrogram
                #     s = self.outputs2_gen[b,:,:]
                #     restore_shape(s, hp.win_length//hp.hop_length, hp.r)

            with tf.variable_scope("Discriminator"):
                self.final_state_real = encode_dis(self.z,
                                                   is_training=is_training)
                self.final_state_fake = encode_dis(self.outputs2_gen,
                                                   is_training=is_training,
                                                   reuse=True)

            if is_training:
                # Discriminator Loss
                self.dis_loss_real = tf.reduce_mean(
                    tf.squared_difference(self.final_state_real, 1))
                self.dis_loss_fake = tf.reduce_mean(
                    tf.squared_difference(self.final_state_fake, 0))
                self.dis_loss = tf.reduce_mean(self.dis_loss_real +
                                               self.dis_loss_fake)

                # Generator Loss
                self.gen_loss = tf.reduce_mean(
                    tf.squared_difference(self.final_state_fake, 1))

                # Training Scheme
                dvars = [
                    e for e in self.graph.get_collection('trainable_variables')
                    if 'Discriminator' in e.name
                ]
                gvars = [
                    e for e in self.graph.get_collection('trainable_variables')
                    if 'Generator' in e.name
                ]

                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)

                grad_d, var_d = zip(*self.optimizer.compute_gradients(
                    self.dis_loss, var_list=dvars))
                grad_d_clipped, _ = tf.clip_by_global_norm(grad_d, 5.)
                grad_g, var_g = zip(*self.optimizer.compute_gradients(
                    self.gen_loss, var_list=gvars))
                grad_g_clipped, _ = tf.clip_by_global_norm(grad_g, 5.)
                self.train_op_dis = self.optimizer.apply_gradients(
                    zip(grad_d_clipped, var_d))
                self.train_op_gen = self.optimizer.apply_gradients(
                    zip(grad_g_clipped, var_g))
                # self.train_op_dis = self.optimizer.minimize(self.dis_loss, global_step=self.global_step,var_list=dvars)
                # self.train_op_gen = self.optimizer.minimize(self.gen_loss, global_step=self.global_step,var_list=gvars)

                # Increments global step
                self.inc = tf.assign_add(self.global_step, 1, name='increment')

                # Profiling
                options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

                # Summmary
                tf.summary.scalar('dis_loss_real', self.dis_loss_real)
                tf.summary.scalar('dis_loss_fake', self.dis_loss_fake)
                tf.summary.scalar('dis_loss', self.dis_loss)
                tf.summary.scalar('gen_loss', self.gen_loss)

                tf.summary.scalar('step', self.inc)

                self.merged = tf.summary.merge_all()
예제 #8
0
파일: train.py 프로젝트: DiyuanLu/tacotron
    def __init__(self, is_training=True):
        self.graph = tf.Graph()

        with self.graph.as_default():

            if is_training:
                #(batch_size, ?, 258) (batch_size, ?, 400)(batch_size, ?, 5125)
                self.spectro, self.magnit, self.length, self.num_batch = get_batch(
                )  # Get data batch
            else:  # Evaluation
                self.length = tf.placeholder(tf.int32, shape=(None, None))
                self.spectro = tf.placeholder(tf.float32,
                                              shape=(None, None,
                                                     hp.n_mels * hp.r))

            self.decoder_inputs = shift_by_one(
                self.spectro)  # this is the decoder's input

            with tf.variable_scope("net"):
                #### Encoder
                ipdb.set_trace()
                self.memory = net.encode(self.spectro,
                                         is_training=is_training)  # (N, T, E)

                #### Decoder
                self.outputs1 = net.decode1(
                    self.decoder_inputs,
                    self.memory,  # encoder RNN output
                    is_training=is_training)  # (N, T', hp.n_mels*hp.r)
                self.outputs2 = net.decode2(
                    self.outputs1,
                    is_training=is_training)  # (N, T', (1+hp.n_fft//2)*hp.r)

            if is_training:
                #### Loss
                if hp.loss_type == "l1":  # L1 loss
                    self.loss1 = tf.abs(self.outputs1 - self.spectro)
                    self.loss2 = tf.abs(self.outputs2 - self.magnit)
                else:  # L2 loss
                    self.loss1 = tf.squared_difference(self.outputs1,
                                                       self.spectro)
                    self.loss2 = tf.squared_difference(self.outputs2,
                                                       self.magnit)

                # Target masking
                ### mask the loss with shape of the input length
                if hp.target_zeros_masking:
                    self.loss1 *= tf.to_float(tf.not_equal(self.spectro, 0.))
                    self.loss2 *= tf.to_float(tf.not_equal(self.magnit, 0.))

                self.mean_loss1 = tf.reduce_mean(self.loss1)
                self.mean_loss2 = tf.reduce_mean(self.loss2)
                self.mean_loss = self.mean_loss1 + self.mean_loss2

                # Logging
                ## histograms
                self.expected1_h = tf.reduce_mean(
                    tf.reduce_mean(self.spectro, -1), 0)
                self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1),
                                             0)

                self.expected2_h = tf.reduce_mean(
                    tf.reduce_mean(self.magnit, -1), 0)
                self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1),
                                             0)

                ## images
                self.expected1_i = tf.expand_dims(
                    tf.reduce_mean(self.spectro[:1], -1, keep_dims=True), 1)
                self.got1_i = tf.expand_dims(
                    tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1)

                self.expected2_i = tf.expand_dims(
                    tf.reduce_mean(self.magnit[:1], -1, keep_dims=True), 1)
                self.got2_i = tf.expand_dims(
                    tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1)

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                self.train_op = self.optimizer.minimize(
                    self.mean_loss, global_step=self.global_step)

                # Summmary
                tf.summary.scalar('mean_loss1', self.mean_loss1)
                tf.summary.scalar('mean_loss2', self.mean_loss2)
                tf.summary.scalar('mean_loss', self.mean_loss)

                tf.summary.histogram('expected_values1', self.expected1_h)
                tf.summary.histogram('gotten_values1', self.got1_h)
                tf.summary.histogram('expected_values2', self.expected2_h)
                tf.summary.histogram('gotten values2', self.got2_h)

                tf.summary.image("expected_values1", self.expected1_i * 255)
                tf.summary.image("gotten_values1", self.got1_i * 255)
                tf.summary.image("expected_values2", self.expected2_i * 255)
                tf.summary.image("gotten_values2", self.got2_i * 255)

                self.merged = tf.summary.merge_all()