def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): self.x, self.y, self.z, self.num_batch = get_batch( is_training=is_training) self.decoder_inputs = shift_by_one(self.y) # Encoder self.memory = encode(self.x, is_training=is_training) # Decoder self.outputs1 = decode1(self.decoder_inputs, self.memory) #, hp.n_mels, 1+hp.n_fft/2) self.outputs2 = decode2( self.outputs1, is_training=is_training) #, hp.n_mels, 1+hp.n_fft/2) # L1 loss self.loss = tf.reduce_mean(tf.abs(self.outputs1 - self.y)) +\ tf.reduce_mean(tf.abs(self.outputs2 - self.z)) if is_training: # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.loss, global_step=self.global_step) # Summmary tf.summary.scalar('loss', self.loss) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.z, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope("net"): # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1( self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2( self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) if is_training: # Loss if hp.loss_type == "l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y) self.loss2 = tf.abs(self.outputs2 - self.z) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y) self.loss2 = tf.squared_difference(self.outputs2, self.z) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss1', self.mean_loss1) tf.summary.scalar('mean_loss2', self.mean_loss2) tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.z, self.num_batch = get_batch() self.decoder_inputs = shift_by_one(self.y) # Note that batch size was multiplied by # gpus. # Now we split the mini-batch data by # gpus. self.x = tf.split(self.x, hp.num_gpus, 0) self.y = tf.split(self.y, hp.num_gpus, 0) self.z = tf.split(self.z, hp.num_gpus, 0) self.decoder_inputs = tf.split(self.decoder_inputs, hp.num_gpus, 0) # optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.losses, self.grads_and_vars_list = [], [] for i in range(hp.num_gpus): with tf.variable_scope('net', reuse=bool(i)): with tf.device('/gpu:{}'.format(i)): with tf.name_scope('gpu_{}'.format(i)): # Encoder self.memory = encode( self.x[i], is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1( self.decoder_inputs[i], self.memory) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2( self.outputs1, is_training=is_training ) # (N, T', (1+hp.n_fft//2)*hp.r) # Loss if hp.loss_type == "l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y[i]) self.loss2 = tf.abs(self.outputs2 - self.z[i]) else: # L2 loss self.loss1 = tf.squared_difference( self.outputs1, self.y[i]) self.loss2 = tf.squared_difference( self.outputs2, self.z[i]) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float( tf.not_equal(self.y[i], 0.)) self.loss2 *= tf.to_float( tf.not_equal(self.z[i], 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 self.losses.append(self.mean_loss) self.grads_and_vars = self.optimizer.compute_gradients( self.mean_loss) self.grads_and_vars_list.append( self.grads_and_vars) with tf.device('/cpu:0'): # Aggregate losses, then calculate average loss. self.loss = tf.add_n(self.losses) / len(self.losses) #Aggregate gradients, then calculate average gradients. self.mean_grads_and_vars = [] for grads_and_vars in zip(*self.grads_and_vars_list): grads = [] for grad, var in grads_and_vars: grads.append(tf.expand_dims(grad, 0)) mean_grad = tf.reduce_mean(tf.concat(grads, 0), 0) #() self.mean_grads_and_vars.append((mean_grad, var)) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.train_op = self.optimizer.apply_gradients( self.mean_grads_and_vars, self.global_step) # Summmary tf.summary.scalar('loss', self.loss) self.merged = tf.summary.merge_all() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.decoder_inputs = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1(self.decoder_inputs, self.memory) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2( self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r)
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): # Build vocab if is_training: _, idx2char = learn_vocab() store_vocab(idx2char) if is_training: self.x, self.y, self.z, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope("net"): # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1( self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2( self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) if is_training: # Loss if hp.loss_type == "l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y) self.loss2 = tf.abs(self.outputs2 - self.z) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y) self.loss2 = tf.squared_difference(self.outputs2, self.z) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 # Logging ## histograms self.expected1_h = tf.reduce_mean(tf.reduce_mean(self.y, -1), 0) self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1), 0) self.expected2_h = tf.reduce_mean(tf.reduce_mean(self.z, -1), 0) self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1), 0) ## images self.expected1_i = tf.expand_dims( tf.reduce_mean(self.y[:1], -1, keep_dims=True), 1) self.got1_i = tf.expand_dims( tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1) self.expected2_i = tf.expand_dims( tf.reduce_mean(self.z[:1], -1, keep_dims=True), 1) self.got2_i = tf.expand_dims( tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss1', self.mean_loss1) tf.summary.scalar('mean_loss2', self.mean_loss2) tf.summary.scalar('mean_loss', self.mean_loss) tf.summary.histogram('expected_values1', self.expected1_h) tf.summary.histogram('gotten_values1', self.got1_h) tf.summary.histogram('expected_values2', self.expected2_h) tf.summary.histogram('gotten values2', self.got2_h) tf.summary.image("expected_values1", self.expected1_i * 255) tf.summary.image("gotten_values1", self.got1_i * 255) tf.summary.image("expected_values2", self.expected2_i * 255) tf.summary.image("gotten_values2", self.got2_i * 255) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.z, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope("net"): # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1(self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) if is_training: # Loss if hp.loss_type=="l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y) self.loss2 = tf.abs(self.outputs2 - self.z) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y) self.loss2 = tf.squared_difference(self.outputs2, self.z) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 # Logging ## histograms self.expected1_h = tf.reduce_mean(tf.reduce_mean(self.y, -1), 0) self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1),0) self.expected2_h = tf.reduce_mean(tf.reduce_mean(self.z, -1), 0) self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1),0) ## images self.expected1_i = tf.expand_dims(tf.reduce_mean(self.y[:1], -1, keep_dims=True), 1) self.got1_i = tf.expand_dims(tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1) self.expected2_i = tf.expand_dims(tf.reduce_mean(self.z[:1], -1, keep_dims=True), 1) self.got2_i = tf.expand_dims(tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize(self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss1', self.mean_loss1) tf.summary.scalar('mean_loss2', self.mean_loss2) tf.summary.scalar('mean_loss', self.mean_loss) tf.summary.histogram('expected_values1', self.expected1_h) tf.summary.histogram('gotten_values1', self.got1_h) tf.summary.histogram('expected_values2', self.expected2_h) tf.summary.histogram('gotten values2', self.got2_h) tf.summary.image("expected_values1", self.expected1_i*255) tf.summary.image("gotten_values1", self.got1_i*255) tf.summary.image("expected_values2", self.expected2_i*255) tf.summary.image("gotten_values2", self.got2_i*255) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x1, self.x2, self.y, self.z, self.num_batch = get_dual_source_batch( ) else: # Evaluation self.x1 = tf.placeholder(tf.int32, shape=(None, None)) self.x2 = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope("net"): # Encoder 1 self.memory1 = encode_vocab(self.x1, char2idx_kana, idx2char_kana, is_training=is_training, scope="encoder1") # (N, T, E) # Encoder 2 self.memory2 = encode_vocab(self.x2, phone2idx, idx2phone, is_training=is_training, scope="encoder2") # (N, T, E) # Decoder self.outputs1, attention_final_state = dual_decode1( self.decoder_inputs, self.memory1, self.memory2, is_training=is_training) # (N, T', hp.n_mels*hp.r) alignment_history1 = attention_final_state.state1_alignment_history.stack( ) # (decoder_timestep, batch_size, memory_size) self.alignment_history1 = tf.transpose( alignment_history1, perm=[1, 2, 0]) # (batch_size, memory_size, decoder_timestep) alignment_history2 = attention_final_state.state2_alignment_history.stack( ) # (decoder_timestep, batch_size, memory_size) self.alignment_history2 = tf.transpose( alignment_history2, perm=[1, 2, 0]) # (batch_size, memory_size, decoder_timestep) self.outputs2 = decode2( self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) if is_training: # Loss if hp.loss_type == "l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y) self.loss2 = tf.abs(self.outputs2 - self.z) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y) self.loss2 = tf.squared_difference(self.outputs2, self.z) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf.train.exponential_decay( learning_rate=hp.lr, global_step=self.global_step, decay_steps=hp.decay_step, decay_rate=hp.decay_rate, staircase=False) self.optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summary tf.summary.scalar('mean_loss1', self.mean_loss1) tf.summary.scalar('mean_loss2', self.mean_loss2) tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.z, self.num_batch = get_batch() self.decoder_inputs = shift_by_one(self.y) # Make sure that batch size was multiplied by # gpus. # Now we split the mini-batch data by # gpus. self.x = tf.split(self.x, hp.num_gpus, 0) self.y = tf.split(self.y, hp.num_gpus, 0) self.z = tf.split(self.z, hp.num_gpus, 0) self.decoder_inputs = tf.split(self.decoder_inputs, hp.num_gpus, 0) # Sequence lengths for masking self.x_lengths = tf.to_int32(tf.reduce_sum(tf.sign(tf.abs(self.x)), -1)) # (N,) self.x_masks = tf.to_float(tf.expand_dims(tf.sign(tf.abs(self.x)), -1)) # (N, T, 1) # optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.losses, self.grads_and_vars_list = [], [] for i in range(hp.num_gpus): with tf.variable_scope('net', reuse=bool(i)): with tf.device('/gpu:{}'.format(i)): with tf.name_scope('gpu_{}'.format(i)): # Encoder self.memory = encode(self.x[i], is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1(self.decoder_inputs[i], self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) # Loss if hp.loss_type=="l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y[i]) self.loss2 = tf.abs(self.outputs2 - self.z[i]) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y[i]) self.loss2 = tf.squared_difference(self.outputs2, self.z[i]) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y[i], 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z[i], 0.)) self.loss1 = tf.reduce_mean(self.loss1) self.loss2 = tf.reduce_mean(self.loss2) self.loss = self.loss1 + self.loss2 self.losses.append(self.loss) self.grads_and_vars = self.optimizer.compute_gradients(self.loss) self.grads_and_vars_list.append(self.grads_and_vars) with tf.device('/cpu:0'): # Aggregate losses, then calculate average loss. self.mean_loss = tf.add_n(self.losses) / len(self.losses) #Aggregate gradients, then calculate average gradients. self.mean_grads_and_vars = [] for grads_and_vars in zip(*self.grads_and_vars_list): grads = [] for grad, var in grads_and_vars: if grad is not None: grads.append(tf.expand_dims(grad, 0)) mean_grad = tf.reduce_mean(tf.concat(grads, 0), 0) #() self.mean_grads_and_vars.append((mean_grad, var)) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.train_op = self.optimizer.apply_gradients(self.mean_grads_and_vars, self.global_step) # Summmary tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope('net'): # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1(self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r)
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.q, self.y, self.z, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) #self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope("Generator"): # Encoder self.memory_gen = encode(self.q, is_training=is_training) # (N, T, E) # Decoder decode_length = int( (hp.bin_size_y[1] * hp.sr - (hp.win_length - 1)) / ((hp.hop_length) * hp.r)) # about 50 self._outputs1_gen = tf.zeros( [hp.batch_size, 1, hp.n_mels * hp.r]) outputs1_gen_list = [] for j in range(decode_length): reuse = None if j == 0 else True self._outputs1_gen += decode1(self._outputs1_gen, self.memory_gen, is_training=is_training, reuse=reuse) outputs1_gen_list.append(self._outputs1_gen) self.outputs1_gen = tf.concat(outputs1_gen_list, 1) self.outputs2_gen = decode2(self.outputs1_gen, is_training=is_training) # for b in range(hp.batch_size): #restore the linear spectrogram # s = self.outputs2_gen[b,:,:] # restore_shape(s, hp.win_length//hp.hop_length, hp.r) with tf.variable_scope("Discriminator"): self.final_state_real = encode_dis(self.z, is_training=is_training) self.final_state_fake = encode_dis(self.outputs2_gen, is_training=is_training, reuse=True) if is_training: # Discriminator Loss self.dis_loss_real = tf.reduce_mean( tf.squared_difference(self.final_state_real, 1)) self.dis_loss_fake = tf.reduce_mean( tf.squared_difference(self.final_state_fake, 0)) self.dis_loss = tf.reduce_mean(self.dis_loss_real + self.dis_loss_fake) # Generator Loss self.gen_loss = tf.reduce_mean( tf.squared_difference(self.final_state_fake, 1)) # Training Scheme dvars = [ e for e in self.graph.get_collection('trainable_variables') if 'Discriminator' in e.name ] gvars = [ e for e in self.graph.get_collection('trainable_variables') if 'Generator' in e.name ] self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) grad_d, var_d = zip(*self.optimizer.compute_gradients( self.dis_loss, var_list=dvars)) grad_d_clipped, _ = tf.clip_by_global_norm(grad_d, 5.) grad_g, var_g = zip(*self.optimizer.compute_gradients( self.gen_loss, var_list=gvars)) grad_g_clipped, _ = tf.clip_by_global_norm(grad_g, 5.) self.train_op_dis = self.optimizer.apply_gradients( zip(grad_d_clipped, var_d)) self.train_op_gen = self.optimizer.apply_gradients( zip(grad_g_clipped, var_g)) # self.train_op_dis = self.optimizer.minimize(self.dis_loss, global_step=self.global_step,var_list=dvars) # self.train_op_gen = self.optimizer.minimize(self.gen_loss, global_step=self.global_step,var_list=gvars) # Increments global step self.inc = tf.assign_add(self.global_step, 1, name='increment') # Profiling options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # Summmary tf.summary.scalar('dis_loss_real', self.dis_loss_real) tf.summary.scalar('dis_loss_fake', self.dis_loss_fake) tf.summary.scalar('dis_loss', self.dis_loss) tf.summary.scalar('gen_loss', self.gen_loss) tf.summary.scalar('step', self.inc) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: #(batch_size, ?, 258) (batch_size, ?, 400)(batch_size, ?, 5125) self.spectro, self.magnit, self.length, self.num_batch = get_batch( ) # Get data batch else: # Evaluation self.length = tf.placeholder(tf.int32, shape=(None, None)) self.spectro = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.decoder_inputs = shift_by_one( self.spectro) # this is the decoder's input with tf.variable_scope("net"): #### Encoder ipdb.set_trace() self.memory = net.encode(self.spectro, is_training=is_training) # (N, T, E) #### Decoder self.outputs1 = net.decode1( self.decoder_inputs, self.memory, # encoder RNN output is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = net.decode2( self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) if is_training: #### Loss if hp.loss_type == "l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.spectro) self.loss2 = tf.abs(self.outputs2 - self.magnit) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.spectro) self.loss2 = tf.squared_difference(self.outputs2, self.magnit) # Target masking ### mask the loss with shape of the input length if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.spectro, 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.magnit, 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 # Logging ## histograms self.expected1_h = tf.reduce_mean( tf.reduce_mean(self.spectro, -1), 0) self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1), 0) self.expected2_h = tf.reduce_mean( tf.reduce_mean(self.magnit, -1), 0) self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1), 0) ## images self.expected1_i = tf.expand_dims( tf.reduce_mean(self.spectro[:1], -1, keep_dims=True), 1) self.got1_i = tf.expand_dims( tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1) self.expected2_i = tf.expand_dims( tf.reduce_mean(self.magnit[:1], -1, keep_dims=True), 1) self.got2_i = tf.expand_dims( tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss1', self.mean_loss1) tf.summary.scalar('mean_loss2', self.mean_loss2) tf.summary.scalar('mean_loss', self.mean_loss) tf.summary.histogram('expected_values1', self.expected1_h) tf.summary.histogram('gotten_values1', self.got1_h) tf.summary.histogram('expected_values2', self.expected2_h) tf.summary.histogram('gotten values2', self.got2_h) tf.summary.image("expected_values1", self.expected1_i * 255) tf.summary.image("gotten_values1", self.got1_i * 255) tf.summary.image("expected_values2", self.expected2_i * 255) tf.summary.image("gotten_values2", self.got2_i * 255) self.merged = tf.summary.merge_all()