def build_model_single_gpu(self, gpu_idx): if gpu_idx == 0: # create the nodes to load for input pipeline filename_queue = tf.train.string_input_producer([self.e2e_dataset]) self.get_wav, self.get_noisy = read_and_decode( filename_queue, 2**14) # load the data to input pipeline wavbatch, \ noisybatch = tf.train.shuffle_batch([self.get_wav, self.get_noisy], batch_size=self.batch_size, num_threads=2, capacity=1000 + 3 * self.batch_size, min_after_dequeue=1000, name='wav_and_noisy') if gpu_idx == 0: self.Gs = [] self.zs = [] self.gtruth_wavs = [] self.gtruth_noisy = [] self.gtruth_wavs.append(wavbatch) self.gtruth_noisy.append(noisybatch) # add channels dimension to manipulate in D and G wavbatch = tf.expand_dims(wavbatch, -1) noisybatch = tf.expand_dims(noisybatch, -1) if gpu_idx == 0: #self.sample_wavs = tf.placeholder(tf.float32, [self.batch_size, # self.canvas_size], # name='sample_wavs') self.reference_G = self.generator(noisybatch, is_ref=True, spk=None, z_on=False) self.reference_G = self.reference_G[0] G = self.generator(noisybatch, is_ref=False, spk=None, z_on=False) print('GAE shape: ', G.get_shape()) self.Gs.append(G) self.rl_audio_summ = audio_summary('real_audio', wavbatch) self.real_w_summ = histogram_summary('real_wav', wavbatch) self.noisy_audio_summ = audio_summary('noisy_audio', noisybatch) self.noisy_w_summ = histogram_summary('noisy_wav', noisybatch) self.gen_audio_summ = audio_summary('G_audio', G) self.gen_summ = histogram_summary('G_wav', G) if gpu_idx == 0: self.g_losses = [] # Add the L1 loss to G g_loss = tf.reduce_mean(tf.abs(tf.sub(G, wavbatch))) self.g_losses.append(g_loss) self.g_loss_sum = scalar_summary("g_loss", g_loss) if gpu_idx == 0: self.get_vars()
def build_model_single_gpu(self, gpu_idx): if gpu_idx == 0: # create the nodes to load for input pipeline filename_queue = tf.train.string_input_producer([self.e2e_dataset]) self.get_wav, self.get_noisy = read_and_decode(filename_queue, 2 ** 14) # load the data to input pipeline wavbatch, \ noisybatch = tf.train.shuffle_batch([self.get_wav, self.get_noisy], batch_size=self.batch_size, num_threads=2, capacity=1000 + 3 * self.batch_size, min_after_dequeue=1000, name='wav_and_noisy') if gpu_idx == 0: self.Gs = [] self.zs = [] self.gtruth_wavs = [] self.gtruth_noisy = [] self.gtruth_wavs.append(wavbatch) self.gtruth_noisy.append(noisybatch) # add channels dimension to manipulate in D and G wavbatch = tf.expand_dims(wavbatch, -1) noisybatch = tf.expand_dims(noisybatch, -1) if gpu_idx == 0: #self.sample_wavs = tf.placeholder(tf.float32, [self.batch_size, # self.canvas_size], # name='sample_wavs') self.reference_G = self.generator(noisybatch, is_ref=True, spk=None, z_on=False) G = self.generator(noisybatch, is_ref=False, spk=None, z_on=False) print('GAE shape: ', G.get_shape()) self.Gs.append(G) self.rl_audio_summ = audio_summary('real_audio', wavbatch) self.real_w_summ = histogram_summary('real_wav', wavbatch) self.noisy_audio_summ = audio_summary('noisy_audio', noisybatch) self.noisy_w_summ = histogram_summary('noisy_wav', noisybatch) self.gen_audio_summ = audio_summary('G_audio', G) self.gen_summ = histogram_summary('G_wav', G) if gpu_idx == 0: self.g_losses = [] # Add the L1 loss to G g_loss = tf.reduce_mean(tf.abs(tf.sub(G, wavbatch))) self.g_losses.append(g_loss) self.g_loss_sum = scalar_summary("g_loss", g_loss) if gpu_idx == 0: self.get_vars()
def input_fn(dataset_dir='', num_epochs=1, canvas_size=32, preemph=0.1, batch_size=32): filename_queue = tf.train.string_input_producer([dataset_dir], num_epochs=num_epochs) get_wav, get_noisy = read_and_decode(filename_queue, canvas_size, preemph) # # try dataset API # dataset = tf.data.TFRecordDataset(dataset_dir) # dataset = dataset.batch(batch_size, drop_remainder=True) # dataset = dataset.map(map_func=read_and_decode, num_parallel_calls=2) # dataset = dataset.shuffle(buffer_size=1000) # if num_epochs > 1: # dataset = dataset.repeat(num_epochs) # iterator = dataset.make_one_shot_iterator() # wavbatch_data, noisybatch_data = iterator.get_next(name='Train_IteratorGetNext') # load the data to input pipeline print(get_wav) wavbatch, \ noisybatch = tf.train.shuffle_batch([get_wav, get_noisy], batch_size=batch_size, num_threads=2, capacity=1000 + 3 * batch_size, min_after_dequeue=1000, name='wav_and_noisy') print(wavbatch) num_examples = 0 for record in tf.python_io.tf_record_iterator(dataset_dir): num_examples += 1 print('!!!!!!!!!!!!!!!!total examples in TFRecords {}: {}'.format(dataset_dir, num_examples)) ################################################################ labels = wavbatch # labels = wavbatch_data return {"wav_and_noisy": noisybatch}, labels
def main(_): file_queue = tf.train.string_input_producer([FLAGS.e2e_dataset]) get_wav = read_and_decode(file_queue, FLAGS.canvas_size) wavbatch = tf.train.shuffle_batch([get_wav], batch_size=FLAGS.batch_size, num_threads=2, capacity=1000 + 3 * FLAGS.batch_size, min_after_dequeue=1000, name='wav_and_noisy') lambdaG = 100 lambdaprediction = 1 savefile = 'DeepLPcoeff.npz' learning_rate = 0.00001 # 0.0001 deltamaxstep = 50 maxstep = 5000 # 10000 test_epochs = 100 training_epochs = 10 # 5000 display_step = int(maxstep / 10) # 500 p = 8 p = 18 rng = np.random FLAGS.canvas_size = FLAGS.canvas_size + p # tf Graph input (only pictures) X = tf.placeholder(tf.float32, [FLAGS.canvas_size, p]) # X0 = tf.placeholder(tf.float32, [FLAGS.canvas_size, p]) Y = tf.placeholder(tf.float32, [FLAGS.canvas_size, 1]) class param: def __init__(self): self.g_enc_depths = '' # 名称 self.d_num_fmaps = '' # 尺寸 self.bias_downconv = False self.deconv_type = 'deconv' self.bias_deconv = False # self.list = [] # 列表 aparam = param() # 定义结构对象 aparam.g_enc_depths = [ 16 ] # , 32]#, 32, 64]#, 64, 128, 128, 256, 256, 512, 1024] # Define D fmaps # aparam.d_num_fmaps = [16, 32, 32, 64, 64, 128, 128, 256, 256, 512, 1024] generator = AEGenerator(aparam) G = generator(X, is_ref=False, z_on=False) G = tf.squeeze(G) # Set model weights W = tf.placeholder(tf.float32, [p, 1]) # rng.randn(p,1) # b = tf.Variable(rng.randn(1), name="lastbias", dtype=tf.float32) #tf.zeros([p,1]) # Construct a linear model # pred = tf.add(tf.matmul(X, W), b) # tf.multiply is wrong # W0 = tf.Variable(rng.randn(p, 1), name="lastweight0", dtype=tf.float32) # rng.randn(p,1) # y_pred0=tf.matmul(X0,W0) # Prediction y_pred = tf.matmul(G, W) # what if i use lpca for w initialization # Define loss and optimizer, minimize the squared error cost = tf.reduce_mean(tf.pow(Y - y_pred, 2)) # cost0 = tf.reduce_mean(tf.pow(Y - y_pred0, 2)) # cost=lambdaG*tf.reduce_mean(tf.pow(G-X,2))+lambdaprediction*cost0 optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost) # optimizer0 = tf.train.RMSPropOptimizer(learning_rate0).minimize(cost0) # optimizertest = tf.train.RMSPropOptimizer(learning_rate).minimize(cost, var_list=[W]) # init = tf.global_variables_initializer() with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) sess.run(tf.global_variables_initializer()) state = load_trainable_vars( sess, savefile) # must load AFTER the initializer # must use this same Session to perform all training # if we start a new Session, things would replay and we'd be training with our validation set (no no) # done = state.get('done', []) log = str(state.get('log', '')) step = 1 training_cost = 0 # init.run() try: while not coord.should_stop(): inputdata = sess.run([wavbatch]) inputdata = np.squeeze(inputdata) train_X = np.asarray( hankel(np.append(np.zeros(p), inputdata), np.zeros( (p, 1)))) # print(train_X.shape) ##print(inputdata) train_Y = np.asarray([np.append(inputdata, np.zeros((p, 1)))]) a, _, _ = lpc2(inputdata, p) b = -a[1:] lpca = np.asarray([b[::-1]]).T # print('linear prediction coeff=',lpca) train_Y = train_Y.T # ++++++++++++++++++++++ for epoch in range(training_epochs): # for (x, y) in zip(train_X, train_Y): sess.run(optimizer, feed_dict={ X: train_X, Y: train_Y, W: lpca }) # sess.run(optimizer0, feed_dict={X0: train_X, Y: train_Y}) # Display logs per epoch step # if (epoch + 1) % display_step == 0: # c = sess.run(cost, feed_dict={X: train_X, Y: train_Y, W: lpca}) # print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c)) # c = sess.run(cost0, feed_dict={X0: train_X, Y: train_Y}) # print("Epoch:", '%04d' % (epoch + 1), "cost0=", "{:.9f}".format(c)) # print("Optimization Finished!") training_cost += sess.run(cost, feed_dict={ X: train_X, Y: train_Y, W: lpca }) averagecost = 10 * np.log10(training_cost / step) if step % display_step == 0: print("step ", step, "Training cost=", averagecost, '\n') # print('W=', sess.run(W),'\n') step += 1 # +++++++++++++++++++++++ if step >= maxstep: log = log + '\n cost={nmse:.6f} dB in {i} iterations'.format( nmse=averagecost, i=step) state['log'] = log save_trainable_vars(sess, savefile, **state) maxstep = maxstep + deltamaxstep for i in range(test_epochs): inputdata, noisybatch0 = sess.run( [wavbatch, noisybatch]) inputdata = np.squeeze(inputdata) xt = inputdata num_sample = len(xt) def nextpow2(x): return np.ceil(np.log2(x)) zpf = 3 Nfft = int(2**nextpow2(num_sample * zpf)) Org_XW = sp.fft(xt, Nfft) test_X = np.asarray( hankel(np.append(np.zeros(p), inputdata), np.zeros((p, 1)))) test_Y = np.asarray( [np.append(inputdata, np.zeros((p, 1)))]) a, _, _ = lpc2(inputdata, p) b = -a[1:] lpca = np.asarray([b[::-1]]).T test_Y = test_Y.T test_G = sess.run(G, feed_dict={X: test_X}) invX = np.linalg.pinv(test_G) myW = np.dot(invX, test_Y) my_est = np.dot(test_G, myW) my_est = my_est[0:-p] plt.figure(1) plt.subplot(221) plt.plot(test_Y[0:-p], label='Original data') plt.plot(test_Y[0:-p] - my_est, 'r', label='my residue line') plt.plot(test_Y[0:-p] - np.matmul(test_X[0:-p], lpca), 'b--', label='LP residue line') plt.legend() print( "LPC error is ", np.mean( np.square(test_Y[0:-p] - np.matmul(test_X[0:-p], lpca)))) print("my error is", np.mean(np.square(test_Y[0:-p] - my_est))) plt.subplot(222) plt.plot(lpca, 'r--', label='LP coef') plt.plot(myW, 'b', label='deep LP coef') plt.legend() Fs = 16000 myDLPcoef = np.append(1, -myW[::-1]) w0, Org_h0 = sig.freqz(1, myDLPcoef, Nfft, whole=True) Org_F0 = Fs * w0 / (2 * np.pi) Org_LP_coef = a w, Org_h = sig.freqz(1, Org_LP_coef, Nfft, whole=True) Org_F = Fs * w / (2 * np.pi) Org_mag = abs(Org_XW) Org_mag = 20 * np.log10(Org_mag) f = np.asarray(range(Nfft)).astype( np.float32) * Fs / Nfft plt.subplot(212) plt.plot(f, Org_mag, 'k-', label='signal') plt.plot(Org_F, 20 * np.log10(abs(Org_h)), 'b--', label='lpc') plt.plot(Org_F0, 20 * np.log10(abs(Org_h0)), label='mylpc') plt.xlim((0, Fs / 2)) plt.legend() filtercoeff = np.append(0, -Org_LP_coef[1:]) est_x = sig.lfilter(filtercoeff, 1, xt) # Estimated signal e = xt - est_x plt.show() plt.close('all') except Exception as e: print(e) coord.request_stop() except IOError as e: coord.should_stop() else: pass finally: pass coord.request_stop() coord.join(threads)
def build_model_single_gpu(self, gpu_idx): if gpu_idx == 0: # create the nodes to load for input pipeline filename_queue = tf.train.string_input_producer([self.e2e_dataset]) self.get_wav, self.get_noisy = read_and_decode( filename_queue, self.canvas_size, self.preemph) # load the data to input pipeline wavbatch, \ noisybatch = tf.train.shuffle_batch([self.get_wav, self.get_noisy], batch_size=self.batch_size, num_threads=2, capacity=1000 + 3 * self.batch_size, min_after_dequeue=1000, name='wav_and_noisy') if gpu_idx == 0: self.Gs = [] self.zs = [] self.gtruth_wavs = [] self.gtruth_noisy = [] self.gtruth_wavs.append(wavbatch) self.gtruth_noisy.append(noisybatch) # add channels dimension to manipulate in D and G wavbatch = tf.expand_dims(wavbatch, -1) noisybatch = tf.expand_dims(noisybatch, -1) # by default leaky relu is used do_prelu = False if self.g_nl == 'prelu': do_prelu = True if gpu_idx == 0: #self.sample_wavs = tf.placeholder(tf.float32, [self.batch_size, # self.canvas_size], # name='sample_wavs') ref_Gs = self.generator(noisybatch, is_ref=True, spk=None, do_prelu=do_prelu) print('num of G returned: ', len(ref_Gs)) self.reference_G = ref_Gs[0] self.ref_z = ref_Gs[1] if do_prelu: self.ref_alpha = ref_Gs[2:] self.alpha_summ = [] for m, ref_alpha in enumerate(self.ref_alpha): # add a summary per alpha self.alpha_summ.append( histogram_summary('alpha_{}'.format(m), ref_alpha)) # make a dummy copy of discriminator to have variables and then # be able to set up the variable reuse for all other devices # merge along channels and this would be a real batch dummy_joint = tf.concat(2, [wavbatch, noisybatch]) dummy = discriminator(self, dummy_joint, reuse=False) G, z = self.generator(noisybatch, is_ref=False, spk=None, do_prelu=do_prelu) self.Gs.append(G) self.zs.append(z) # add new dimension to merge with other pairs D_rl_joint = tf.concat(2, [wavbatch, noisybatch]) D_fk_joint = tf.concat(2, [G, noisybatch]) # build rl discriminator d_rl_logits = discriminator(self, D_rl_joint, reuse=True) # build fk G discriminator d_fk_logits = discriminator(self, D_fk_joint, reuse=True) # make disc variables summaries self.d_rl_sum = histogram_summary("d_real", d_rl_logits) self.d_fk_sum = histogram_summary("d_fake", d_fk_logits) #self.d_nfk_sum = histogram_summary("d_noisyfake", d_nfk_logits) self.rl_audio_summ = audio_summary('real_audio', wavbatch) self.real_w_summ = histogram_summary('real_wav', wavbatch) self.noisy_audio_summ = audio_summary('noisy_audio', noisybatch) self.noisy_w_summ = histogram_summary('noisy_wav', noisybatch) self.gen_audio_summ = audio_summary('G_audio', G) self.gen_summ = histogram_summary('G_wav', G) if gpu_idx == 0: self.g_losses = [] self.g_l1_losses = [] self.g_adv_losses = [] self.d_rl_losses = [] self.d_fk_losses = [] #self.d_nfk_losses = [] self.d_losses = [] d_rl_loss = tf.reduce_mean(tf.squared_difference(d_rl_logits, 1.)) d_fk_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 0.)) #d_nfk_loss = tf.reduce_mean(tf.squared_difference(d_nfk_logits, 0.)) g_adv_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 1.)) d_loss = d_rl_loss + d_fk_loss # Add the L1 loss to G g_l1_loss = self.l1_lambda * tf.reduce_mean(tf.abs(tf.sub(G, wavbatch))) g_loss = g_adv_loss + g_l1_loss self.g_l1_losses.append(g_l1_loss) self.g_adv_losses.append(g_adv_loss) self.g_losses.append(g_loss) self.d_rl_losses.append(d_rl_loss) self.d_fk_losses.append(d_fk_loss) #self.d_nfk_losses.append(d_nfk_loss) self.d_losses.append(d_loss) self.d_rl_loss_sum = scalar_summary("d_rl_loss", d_rl_loss) self.d_fk_loss_sum = scalar_summary("d_fk_loss", d_fk_loss) #self.d_nfk_loss_sum = scalar_summary("d_nfk_loss", # d_nfk_loss) self.g_loss_sum = scalar_summary("g_loss", g_loss) self.g_loss_l1_sum = scalar_summary("g_l1_loss", g_l1_loss) self.g_loss_adv_sum = scalar_summary("g_adv_loss", g_adv_loss) self.d_loss_sum = scalar_summary("d_loss", d_loss) if gpu_idx == 0: self.get_vars()
def build_model_single_gpu(self, gpu_idx): if gpu_idx == 0: # create the nodes to load for input pipeline filename_queue = tf.train.string_input_producer([self.e2e_dataset]) self.get_wav, self.get_noisy = read_and_decode( filename_queue, self.canvas_size, self.preemph) # load the data to input pipeline wavbatch, \ noisybatch = tf.train.shuffle_batch([self.get_wav, self.get_noisy], batch_size=self.batch_size, num_threads=2, capacity=1000 + 3 * self.batch_size, min_after_dequeue=1000, name='wav_and_noisy') if gpu_idx == 0: self.Gs = [] self.zs = [] self.gtruth_wavs = [] self.gtruth_noisy = [] for nr in range(self.depth): self.Gs.append([]) self.zs.append([]) self.gtruth_wavs.append(wavbatch) self.gtruth_noisy.append(noisybatch) # add channels dimension to manipulate in D and G wavbatch = tf.expand_dims(wavbatch, -1) noisybatch = tf.expand_dims(noisybatch, -1) # by default leaky relu is used do_prelu = False if self.g_nl == 'prelu': do_prelu = True if gpu_idx == 0: ref_Gs = self.generator(noisybatch, is_ref=True, spk=None, do_prelu=do_prelu) print('num of G returned: ', len(ref_Gs)) self.reference_G = ref_Gs[0] # returned wave by the generator self.ref_z = ref_Gs[1] # returned z by the generator if do_prelu: self.ref_alpha = ref_Gs[2] self.alpha_summ = [] for nr in range(self.depth): self.alpha_summ.append([]) for m, ref_alpha_nr in enumerate(self.ref_alpha[nr]): # add a summary per alpha self.alpha_summ[nr].append( histogram_summary('alpha_{}_{}'.format(nr, m), ref_alpha_nr)) # make a dummy copy of discriminator to have variables and then # be able to set up the variable reuse for all other devices # merge along channels and this would be a real batch dummy_joint = tf.concat([wavbatch, noisybatch], 2) dummy = discriminator(self, dummy_joint, reuse=False) Gs, zs = self.generator(noisybatch, is_ref=False, spk=None, do_prelu=do_prelu) for nr in range(self.depth): self.Gs[nr].append(Gs[nr]) self.zs[nr].append(zs[nr]) # add new dimension to merge with other pairs D_rl_joint = tf.concat([wavbatch, noisybatch], 2) # real D_fk_joint = [] for nr in range(self.depth): D_fk_joint.append(tf.concat([Gs[nr], noisybatch], 2)) # build rl discriminator d_rl_logits = discriminator(self, D_rl_joint, reuse=True) # build fk G discriminator d_fk_logits = [] for nr in range(self.depth): d_fk_logits.append(discriminator(self, D_fk_joint[nr], reuse=True)) if gpu_idx == 0: self.g_losses = [] self.g_l1_losses = [] for nr in range(self.depth): self.g_l1_losses.append([]) self.g_adv_losses = [] self.d_rl_losses = [] self.d_fk_losses = [] for nr in range(self.depth): self.d_fk_losses.append([]) self.d_losses = [] d_rl_loss = tf.reduce_mean(tf.squared_difference(d_rl_logits, 1.)) d_fk_loss = [] for nr in range(self.depth): d_fk_loss.append( tf.reduce_mean(tf.squared_difference(d_fk_logits[nr], 0.))) g_adv_loss = 0. for nr in range(self.depth): g_adv_loss += tf.reduce_mean( tf.squared_difference(d_fk_logits[nr], 1.)) ## corrected division of self.depth here g_adv_loss /= self.depth d_loss = d_rl_loss for nr in range(self.depth): ## corrected division of self.depth here d_loss += d_fk_loss[nr] / self.depth # Add the L1 loss to G g_l1_loss = [] for nr in range(self.depth): g_l1_loss.append( self.l1_lambda * self.weights[nr] * tf.reduce_mean(tf.abs(tf.subtract(Gs[nr], wavbatch)))) g_loss = g_adv_loss for nr in range(self.depth): g_loss += g_l1_loss[nr] for nr in range(self.depth): self.g_l1_losses[nr].append(g_l1_loss[nr]) self.g_adv_losses.append(g_adv_loss) self.g_losses.append(g_loss) self.d_rl_losses.append(d_rl_loss) for nr in range(self.depth): self.d_fk_losses[nr].append(d_fk_loss[nr]) self.d_losses.append(d_loss) if gpu_idx == 0: self.get_vars()
def build_model_single_gpu(self, gpu_idx): if gpu_idx == 0: # create the nodes to load for input pipeline filename_queue = tf.train.string_input_producer([self.e2e_dataset]) self.get_nn,\ self.get_ref = read_and_decode(filename_queue, self.canvas_size, self.preemph) # load the data to input pipeline nnbatch, refbatch\ = tf.train.shuffle_batch([self.get_nn, self.get_ref], batch_size=self.batch_size, num_threads=2, capacity=1000 + 3 * self.batch_size, min_after_dequeue=1000, name='nn_and_ref') if gpu_idx == 0: self.gtruth_nn = [] self.gtruth_ref = [] self.gtruth_nn.append(nnbatch) self.gtruth_ref.append(refbatch) # add channels dimension to manipulate in D and G nnbatch = tf.expand_dims(nnbatch, -1) refbatch = tf.expand_dims(refbatch, -1) # by default leaky relu is used do_prelu = False if self.g_nl == 'prelu': do_prelu = True if gpu_idx == 0: #self.sample_wavs = tf.placeholder(tf.float32, [self.batch_size, # self.canvas_size], # name='sample_wavs') # make a dummy copy of discriminator to have variables and then # be able to set up the variable reuse for all other devices # merge along channels and this would be a real batch dummy_joint = tf.concat([nnbatch, refbatch], 2) dummy = discriminator(self, dummy_joint, reuse=False) # add new dimension to merge with other pairs D_rl_joint = tf.concat([nnbatch, refbatch], 2) D_fk_joint = tf.concat([nnbatch, G], 2) # build rl discriminator d_rl_logits = discriminator(self, D_rl_joint, reuse=True) # build fk G discriminator d_fk_logits = discriminator(self, D_fk_joint, reuse=True) d_rl_logits = tf.Print(d_rl_logits, [ tf.reduce_mean(d_rl_logits), tf.reduce_mean(tf.cast(tf.greater(d_rl_logits, 0.5), tf.float32)), tf.reduce_mean(d_fk_logits), tf.reduce_mean(tf.cast(tf.less(d_fk_logits, 0.5), tf.float32)) ], 'D_rl/D_fk (avg,#ratio correct) = ') self.d_rl_logits = d_rl_logits self.d_fk_logits = d_fk_logits # make disc variables summaries self.d_rl_sum = histogram_summary("d_real", d_rl_logits) self.d_fk_sum = histogram_summary("d_fake", d_fk_logits) #self.d_nfk_sum = histogram_summary("d_noisyfake", d_nfk_logits) self.noisy_audio_summ = audio_summary('nn_audio', nnbatch) self.noisy_w_summ = histogram_summary('nn_wav', nnbatch) if gpu_idx == 0: self.d_rl_losses = [] self.d_fk_losses = [] #self.d_nfk_losses = [] self.d_losses = [] d_rl_loss = tf.reduce_mean(tf.squared_difference(d_rl_logits, 1.)) d_fk_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 0.)) d_loss = d_rl_loss + d_fk_loss self.d_rl_losses.append(d_rl_loss) self.d_fk_losses.append(d_fk_loss) #self.d_nfk_losses.append(d_nfk_loss) self.d_losses.append(d_loss) self.d_rl_loss_sum = scalar_summary("d_rl_loss", d_rl_loss) self.d_fk_loss_sum = scalar_summary("d_fk_loss", d_fk_loss) self.d_loss_sum = scalar_summary("d_loss", d_loss) #self.d_nfk_loss_sum = scalar_summary("d_nfk_loss", # d_nfk_loss) if gpu_idx == 0: self.get_vars()
def build_model_single_gpu(self, gpu_idx): if gpu_idx == 0: # create the nodes to load for input pipeline filename_queue = tf.train.string_input_producer([self.e2e_dataset]) self.get_wav, self.get_noisy = read_and_decode(filename_queue, self.canvas_size, self.preemph) # load the data to input pipeline wavbatch, \ noisybatch = tf.train.shuffle_batch([self.get_wav, self.get_noisy], batch_size=self.batch_size, num_threads=2, capacity=1000 + 3 * self.batch_size, min_after_dequeue=1000, name='wav_and_noisy') if gpu_idx == 0: self.Gs = [] self.zs = [] self.gtruth_wavs = [] self.gtruth_noisy = [] self.gtruth_wavs.append(wavbatch) self.gtruth_noisy.append(noisybatch) # add channels dimension to manipulate in D and G wavbatch = tf.expand_dims(wavbatch, -1) noisybatch = tf.expand_dims(noisybatch, -1) # by default leaky relu is used do_prelu = False if self.g_nl == 'prelu': do_prelu = True if gpu_idx == 0: #self.sample_wavs = tf.placeholder(tf.float32, [self.batch_size, # self.canvas_size], # name='sample_wavs') ref_Gs = self.generator(noisybatch, is_ref=True, spk=None, do_prelu=do_prelu) print('num of G returned: ', len(ref_Gs)) self.reference_G = ref_Gs[0] self.ref_z = ref_Gs[1] if do_prelu: self.ref_alpha = ref_Gs[2:] self.alpha_summ = [] for m, ref_alpha in enumerate(self.ref_alpha): # add a summary per alpha self.alpha_summ.append(histogram_summary('alpha_{}'.format(m), ref_alpha)) # make a dummy copy of discriminator to have variables and then # be able to set up the variable reuse for all other devices # merge along channels and this would be a real batch dummy_joint = tf.concat(2, [wavbatch, noisybatch]) dummy = discriminator(self, dummy_joint, reuse=False) G, z = self.generator(noisybatch, is_ref=False, spk=None, do_prelu=do_prelu) self.Gs.append(G) self.zs.append(z) # add new dimension to merge with other pairs D_rl_joint = tf.concat(2, [wavbatch, noisybatch]) D_fk_joint = tf.concat(2, [G, noisybatch]) # build rl discriminator d_rl_logits = discriminator(self, D_rl_joint, reuse=True) # build fk G discriminator d_fk_logits = discriminator(self, D_fk_joint, reuse=True) # make disc variables summaries self.d_rl_sum = histogram_summary("d_real", d_rl_logits) self.d_fk_sum = histogram_summary("d_fake", d_fk_logits) #self.d_nfk_sum = histogram_summary("d_noisyfake", d_nfk_logits) self.rl_audio_summ = audio_summary('real_audio', wavbatch) self.real_w_summ = histogram_summary('real_wav', wavbatch) self.noisy_audio_summ = audio_summary('noisy_audio', noisybatch) self.noisy_w_summ = histogram_summary('noisy_wav', noisybatch) self.gen_audio_summ = audio_summary('G_audio', G) self.gen_summ = histogram_summary('G_wav', G) if gpu_idx == 0: self.g_losses = [] self.g_l1_losses = [] self.g_adv_losses = [] self.d_rl_losses = [] self.d_fk_losses = [] #self.d_nfk_losses = [] self.d_losses = [] d_rl_loss = tf.reduce_mean(tf.squared_difference(d_rl_logits, 1.)) d_fk_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 0.)) #d_nfk_loss = tf.reduce_mean(tf.squared_difference(d_nfk_logits, 0.)) g_adv_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 1.)) d_loss = d_rl_loss + d_fk_loss # Add the L1 loss to G g_l1_loss = self.l1_lambda * tf.reduce_mean(tf.abs(tf.sub(G, wavbatch))) g_loss = g_adv_loss + g_l1_loss self.g_l1_losses.append(g_l1_loss) self.g_adv_losses.append(g_adv_loss) self.g_losses.append(g_loss) self.d_rl_losses.append(d_rl_loss) self.d_fk_losses.append(d_fk_loss) #self.d_nfk_losses.append(d_nfk_loss) self.d_losses.append(d_loss) self.d_rl_loss_sum = scalar_summary("d_rl_loss", d_rl_loss) self.d_fk_loss_sum = scalar_summary("d_fk_loss", d_fk_loss) #self.d_nfk_loss_sum = scalar_summary("d_nfk_loss", # d_nfk_loss) self.g_loss_sum = scalar_summary("g_loss", g_loss) self.g_loss_l1_sum = scalar_summary("g_l1_loss", g_l1_loss) self.g_loss_adv_sum = scalar_summary("g_adv_loss", g_adv_loss) self.d_loss_sum = scalar_summary("d_loss", d_loss) if gpu_idx == 0: self.get_vars()
def build_model_single_gpu(self, gpu_idx): if gpu_idx == 0: # create the nodes to load for input pipeline filename_queue = tf.train.string_input_producer([self.e2e_dataset]) self.get_seiz, self.get_nonseiz = read_and_decode( filename_queue, self.canvas_size) # load the data to input pipeline seiz_batch, nonseiz_batch = tf.train.shuffle_batch( [self.get_seiz, self.get_nonseiz], batch_size=self.batch_size, num_threads=2, capacity=1000 + 3 * self.batch_size, min_after_dequeue=1000, name='seiz_and_nonseiz') if gpu_idx == 0: self.Gs = [] self.zs = [] self.gtruth_seiz = [] self.gtruth_nonseiz = [] self.gtruth_seiz.append(seiz_batch) self.gtruth_nonseiz.append(nonseiz_batch) # add channels dimension to manipulate in D and G seiz_batch = tf.expand_dims(seiz_batch, -1) nonseiz_batch = tf.expand_dims(nonseiz_batch, -1) if gpu_idx == 0: ref_Gs = self.generator(nonseiz_batch, is_ref=True) print('num of G returned: ', len(ref_Gs)) self.reference_G = ref_Gs[0] self.ref_z = ref_Gs[1] # make a dummy copy of discriminator to create the variables dummy_joint = tf.concat(2, [seiz_batch, nonseiz_batch]) dummy = discriminator(self, dummy_joint, reuse=False) # build generator G, z = self.generator(nonseiz_batch, is_ref=False) self.Gs.append(G) self.zs.append(z) D_rl_joint = tf.concat(2, [seiz_batch, nonseiz_batch]) D_fk_joint = tf.concat(2, [G, nonseiz_batch]) # build discriminator d_rl_logits = discriminator(self, D_rl_joint, reuse=True) d_fk_logits = discriminator(self, D_fk_joint, reuse=True) if gpu_idx == 0: self.g_losses = [] self.g_l1_losses = [] self.g_adv_losses = [] self.d_rl_losses = [] self.d_fk_losses = [] self.d_losses = [] ### Discriminator loss ### d_rl_loss = tf.reduce_mean(tf.squared_difference(d_rl_logits, 1.)) d_fk_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 0.)) g_adv_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 1.)) d_loss = d_rl_loss + d_fk_loss ### Generator loss ### g_l1_loss = self.l1_lambda * tf.reduce_mean( tf.abs(tf.sub(G, seiz_batch))) g_loss = g_adv_loss + g_l1_loss self.g_l1_losses.append(g_l1_loss) self.g_adv_losses.append(g_adv_loss) self.g_losses.append(g_loss) self.d_rl_losses.append(d_rl_loss) self.d_fk_losses.append(d_fk_loss) self.d_losses.append(d_loss) if gpu_idx == 0: self.get_vars()