def clean(self, x): """ clean a utterance x x: numpy array containing the normalized noisy waveform """ c_res = None letent_total = [] for beg_i in range(0, x.shape[0], self.canvas_size): if x.shape[0] - beg_i < self.canvas_size: length = x.shape[0] - beg_i pad = (self.canvas_size) - length else: length = self.canvas_size pad = 0 x_ = np.zeros((self.batch_size, self.canvas_size)) if pad > 0: x_[0] = np.concatenate( (x[beg_i:beg_i + length], np.zeros(pad))) else: x_[0] = x[beg_i:beg_i + length] print('Cleaning chunk {} -> {}'.format(beg_i, beg_i + length)) fdict = {self.gtruth_noisy[0]: x_} canvas_w = self.sess.run(self.Gs[0], feed_dict=fdict)[0] canvas_w = canvas_w.reshape((self.canvas_size)) print('canvas w shape: ', canvas_w.shape) if pad > 0: print('Removing padding of {} samples'.format(pad)) # get rid of last padded samples canvas_w = canvas_w[:-pad] if c_res is None: c_res = canvas_w else: c_res = np.concatenate((c_res, canvas_w)) # deemphasize c_res = de_emph(c_res, self.preemph) return c_res
def clean(self, x): """ clean a utterance x x: numpy array containing the normalized noisy waveform """ c_res = None for beg_i in range(0, x.shape[0], self.canvas_size): if x.shape[0] - beg_i < self.canvas_size: length = x.shape[0] - beg_i pad = (self.canvas_size) - length else: length = self.canvas_size pad = 0 x_ = np.zeros((self.batch_size, self.canvas_size)) if pad > 0: x_[0] = np.concatenate((x[beg_i:beg_i + length], np.zeros(pad))) else: x_[0] = x[beg_i:beg_i + length] print('Cleaning chunk {} -> {}'.format(beg_i, beg_i + length)) fdict = {self.gtruth_noisy[0]:x_} canvas_w = self.sess.run(self.Gs[0], feed_dict=fdict)[0] canvas_w = canvas_w.reshape((self.canvas_size)) print('canvas w shape: ', canvas_w.shape) if pad > 0: print('Removing padding of {} samples'.format(pad)) # get rid of last padded samples canvas_w = canvas_w[:-pad] if c_res is None: c_res = canvas_w else: c_res = np.concatenate((c_res, canvas_w)) # deemphasize c_res = de_emph(c_res, self.preemph) return c_res
def clean(self, x): """ clean a utterance x x: numpy array containing the normalized noisy waveform """ x_ = np.zeros((self.batch_size * self.canvas_size)) x_[:x.shape[0]] = x x_ = x_.reshape((self.batch_size, self.canvas_size)) # self.x = np.array(self.x)[:, :, np.newaxis] fdict = {self.gtruth_noisy[0]: x_} canvas_w = self.sess.run(self.Gs[0], feed_dict=fdict) canvas_w = np.array(canvas_w) canvas_w = canvas_w.reshape((self.batch_size * self.canvas_size)) print('canvas w shape: ', canvas_w.shape) canvas_w = canvas_w[:x.shape[0]] # deemphasize c_res = de_emph(canvas_w, self.preemph) return c_res
def clean_serving(x, canvas_size, batch_size, preemph, predictor=None): """ clean a utterance x x: numpy array containing the normalized noisy waveform """ c_res = None print('start timer') Time1 = time.time() for beg_i in range(0, x.shape[0], canvas_size): if x.shape[0] - beg_i < canvas_size: length = x.shape[0] - beg_i pad = canvas_size - length else: length = canvas_size pad = 0 x_ = np.zeros((batch_size, canvas_size)) if pad > 0: x_[0] = np.concatenate((x[beg_i:beg_i + length], np.zeros(pad))) else: x_[0] = x[beg_i:beg_i + length] print('Cleaning chunk {} -> {}'.format(beg_i, beg_i + length)) canvas_w = None # fdict = {self.gtruth_noisy[0]:x_} # canvas_w = self.sess.run(self.Gs[0], # feed_dict=fdict)[0] test_example = {'wav_and_noisy': x_.tolist()} if predictor == None: canvas_w = x_[0] else: canvas_w = predictor.predict(test_example) canvas_w = canvas_w.reshape((canvas_size)) print('canvas w shape: ', canvas_w.shape) if pad > 0: print('Removing padding of {} samples'.format(pad)) # get rid of last padded samples canvas_w = canvas_w[:-pad] if c_res is None: c_res = canvas_w else: c_res = np.concatenate((c_res, canvas_w)) # deemphasize c_res = de_emph(c_res, preemph) print('finish {}'.format(time.time() - Time1)) return c_res
def train(self, config, devices): """ Train the SEGAN """ print('Initializing optimizers...') # init optimizers d_opt = self.d_opt g_opt = self.g_opt num_devices = len(devices) try: init = tf.global_variables_initializer() except AttributeError: # fall back to old implementation init = tf.initialize_all_variables() print('Initializing variables...') self.sess.run(init) g_summs = [ self.d_fk_sum, #self.d_nfk_sum, self.d_fk_loss_sum, #self.d_nfk_loss_sum, self.g_loss_sum, self.g_loss_l1_sum, self.g_loss_adv_sum, self.gen_summ, self.gen_audio_summ ] # if we have prelus, add them to summary if hasattr(self, 'alpha_summ'): g_summs += self.alpha_summ self.g_sum = tf.summary.merge(g_summs) self.d_sum = tf.summary.merge([ self.d_loss_sum, self.d_rl_sum, self.d_rl_loss_sum, self.rl_audio_summ, self.real_w_summ, self.disc_noise_std_summ ]) if not os.path.exists(os.path.join(config.save_path, 'train')): os.makedirs(os.path.join(config.save_path, 'train')) self.writer = tf.summary.FileWriter( os.path.join(config.save_path, 'train'), self.sess.graph) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) print('Sampling some wavs to store sample references...') # Hang onto a copy of wavs so we can feed the same one every time # we store samples to disk for hearing # pick a single batch sample_noisy, sample_wav, \ sample_z = self.sess.run([self.gtruth_noisy[0], self.gtruth_wavs[0], self.zs[0]]) print('sample noisy shape: ', sample_noisy.shape) print('sample wav shape: ', sample_wav.shape) print('sample z shape: ', sample_z.shape) save_path = config.save_path counter = 0 # count number of samples num_examples = 0 for record in tf.python_io.tf_record_iterator(self.e2e_dataset): num_examples += 1 print('total examples in TFRecords {}: {}'.format( self.e2e_dataset, num_examples)) # last samples (those not filling a complete batch) are discarded num_batches = num_examples / self.batch_size print('Batches per epoch: ', num_batches) if self.load(self.save_path): print('[*] Load SUCCESS') else: print('[!] Load failed') batch_idx = 0 curr_epoch = 0 batch_timings = [] d_fk_losses = [] #d_nfk_losses = [] d_rl_losses = [] g_adv_losses = [] g_l1_losses = [] try: while not coord.should_stop(): start = timeit.default_timer() if counter % config.save_freq == 0: for d_iter in range(self.disc_updates): _d_opt, _d_sum, \ d_fk_loss, \ d_rl_loss = self.sess.run([d_opt, self.d_sum, self.d_fk_losses[0], #self.d_nfk_losses[0], self.d_rl_losses[0]]) if self.d_clip_weights: self.sess.run(self.d_clip) #d_nfk_loss, \ # now G iterations _g_opt, _g_sum, \ g_adv_loss, \ g_l1_loss = self.sess.run([g_opt, self.g_sum, self.g_adv_losses[0], self.g_l1_losses[0]]) else: for d_iter in range(self.disc_updates): _d_opt, \ d_fk_loss, \ d_rl_loss = self.sess.run([d_opt, self.d_fk_losses[0], #self.d_nfk_losses[0], self.d_rl_losses[0]]) #d_nfk_loss, \ if self.d_clip_weights: self.sess.run(self.d_clip) _g_opt, \ g_adv_loss, \ g_l1_loss = self.sess.run([g_opt, self.g_adv_losses[0], self.g_l1_losses[0]]) end = timeit.default_timer() batch_timings.append(end - start) d_fk_losses.append(d_fk_loss) #d_nfk_losses.append(d_nfk_loss) d_rl_losses.append(d_rl_loss) g_adv_losses.append(g_adv_loss) g_l1_losses.append(g_l1_loss) print('{}/{} (epoch {}), d_rl_loss = {:.5f}, ' 'd_fk_loss = {:.5f}, ' #d_nfk_loss = {:.5f}, ' 'g_adv_loss = {:.5f}, g_l1_loss = {:.5f},' ' time/batch = {:.5f}, ' 'mtime/batch = {:.5f}'.format( counter, config.epoch * num_batches, curr_epoch, d_rl_loss, d_fk_loss, #d_nfk_loss, g_adv_loss, g_l1_loss, end - start, np.mean(batch_timings))) batch_idx += num_devices counter += num_devices if (counter / num_devices) % config.save_freq == 0: self.save(config.save_path, counter) self.writer.add_summary(_g_sum, counter) self.writer.add_summary(_d_sum, counter) fdict = { self.gtruth_noisy[0]: sample_noisy, self.zs[0]: sample_z } canvas_w = self.sess.run(self.Gs[0], feed_dict=fdict) swaves = sample_wav sample_dif = sample_wav - sample_noisy for m in range(min(20, canvas_w.shape[0])): print('w{} max: {} min: {}'.format( m, np.max(canvas_w[m]), np.min(canvas_w[m]))) wavfile.write( os.path.join( save_path, 'sample_{}-' '{}.wav'.format(counter, m)), 16e3, de_emph(canvas_w[m], self.preemph)) m_gtruth_path = os.path.join( save_path, 'gtruth_{}.' 'wav'.format(m)) if not os.path.exists(m_gtruth_path): wavfile.write( os.path.join(save_path, 'gtruth_{}.' 'wav'.format(m)), 16e3, de_emph(swaves[m], self.preemph)) wavfile.write( os.path.join(save_path, 'noisy_{}.' 'wav'.format(m)), 16e3, de_emph(sample_noisy[m], self.preemph)) wavfile.write( os.path.join(save_path, 'dif_{}.wav'.format(m)), 16e3, de_emph(sample_dif[m], self.preemph)) np.savetxt(os.path.join(save_path, 'd_rl_losses.txt'), d_rl_losses) np.savetxt(os.path.join(save_path, 'd_fk_losses.txt'), d_fk_losses) np.savetxt(os.path.join(save_path, 'g_adv_losses.txt'), g_adv_losses) np.savetxt(os.path.join(save_path, 'g_l1_losses.txt'), g_l1_losses) if batch_idx >= num_batches: curr_epoch += 1 # re-set batch idx batch_idx = 0 # check if we have to deactivate L1 if curr_epoch >= config.l1_remove_epoch and self.deactivated_l1 == False: print('** Deactivating L1 factor! **') self.sess.run(tf.assign(self.l1_lambda, 0.)) self.deactivated_l1 = True # check if we have to start decaying noise (if any) if curr_epoch >= config.denoise_epoch and self.deactivated_noise == False: # apply noise std decay rate decay = config.noise_decay if not hasattr(self, 'curr_noise_std'): self.curr_noise_std = self.init_noise_std new_noise_std = decay * self.curr_noise_std if new_noise_std < config.denoise_lbound: print('New noise std {} < lbound {}, setting 0.'. format(new_noise_std, config.denoise_lbound)) print('** De-activating noise layer **') # it it's lower than a lower bound, cancel out completely new_noise_std = 0. self.deactivated_noise = True else: print( 'Applying decay {} to noise std {}: {}'.format( decay, self.curr_noise_std, new_noise_std)) self.sess.run( tf.assign(self.disc_noise_std, new_noise_std)) self.curr_noise_std = new_noise_std if curr_epoch >= config.epoch: # done training print('Done training; epoch limit {} ' 'reached.'.format(self.epoch)) print('Saving last model at iteration {}'.format(counter)) self.save(config.save_path, counter) self.writer.add_summary(_g_sum, counter) self.writer.add_summary(_d_sum, counter) break except tf.errors.OutOfRangeError: print('Done training; epoch limit {} reached.'.format(self.epoch)) finally: coord.request_stop() coord.join(threads)
test_error = test_error ** 0.5 print( 'training_error', "{:.9f}".format(training_error)) print( 'test_error', "{:.9f}".format(test_error)) #print('learning_rate= ', learning_rate) #print('num_quantization_steps= ', num_steps) #%%########################################################################## # Savings network AE_output={}; #AE_output['y_pred_test']=y_pred_test #AE_output['y_true_test']=y_true_test if emphasis: y_pred_test = de_emph(y_pred_test, input_dim) y_true_test = de_emph(y_true_test, input_dim) AE_output['y_pred_test']=y_pred_test AE_output['y_true_test']=y_true_test si.savemat("/home/hsadeghi/Dropbox/May/past_codes/rnn_AE_output.mat", AE_output); sess.close()
def train(self, config, devices): """ Train the SEGAN """ print('Initializing optimizers...') # init optimizers d_opt = self.d_opt g_opt = self.g_opt num_devices = len(devices) try: init = tf.global_variables_initializer() except AttributeError: # fall back to old implementation init = tf.initialize_all_variables() print('Initializing variables...') self.sess.run(init) g_summs = [self.d_fk_sum, #self.d_nfk_sum, self.d_fk_loss_sum, #self.d_nfk_loss_sum, self.g_loss_sum, self.g_loss_l1_sum, self.g_loss_adv_sum, self.gen_summ, self.gen_audio_summ] # if we have prelus, add them to summary if hasattr(self, 'alpha_summ'): g_summs += self.alpha_summ self.g_sum = tf.summary.merge(g_summs) self.d_sum = tf.summary.merge([self.d_loss_sum, self.d_rl_sum, self.d_rl_loss_sum, self.rl_audio_summ, self.real_w_summ, self.disc_noise_std_summ]) if not os.path.exists(os.path.join(config.save_path, 'train')): os.makedirs(os.path.join(config.save_path, 'train')) self.writer = tf.summary.FileWriter(os.path.join(config.save_path, 'train'), self.sess.graph) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) print('Sampling some wavs to store sample references...') # Hang onto a copy of wavs so we can feed the same one every time # we store samples to disk for hearing # pick a single batch sample_noisy, sample_wav, \ sample_z = self.sess.run([self.gtruth_noisy[0], self.gtruth_wavs[0], self.zs[0]]) print('sample noisy shape: ', sample_noisy.shape) print('sample wav shape: ', sample_wav.shape) print('sample z shape: ', sample_z.shape) save_path = config.save_path counter = 0 # count number of samples num_examples = 0 for record in tf.python_io.tf_record_iterator(self.e2e_dataset): num_examples += 1 print('total examples in TFRecords {}: {}'.format(self.e2e_dataset, num_examples)) # last samples (those not filling a complete batch) are discarded num_batches = num_examples / self.batch_size print('Batches per epoch: ', num_batches) if self.load(self.save_path): print('[*] Load SUCCESS') else: print('[!] Load failed') batch_idx = 0 curr_epoch = 0 batch_timings = [] d_fk_losses = [] #d_nfk_losses = [] d_rl_losses = [] g_adv_losses = [] g_l1_losses = [] try: while not coord.should_stop(): start = timeit.default_timer() if counter % config.save_freq == 0: for d_iter in range(self.disc_updates): _d_opt, _d_sum, \ d_fk_loss, \ d_rl_loss = self.sess.run([d_opt, self.d_sum, self.d_fk_losses[0], #self.d_nfk_losses[0], self.d_rl_losses[0]]) if self.d_clip_weights: self.sess.run(self.d_clip) #d_nfk_loss, \ # now G iterations _g_opt, _g_sum, \ g_adv_loss, \ g_l1_loss = self.sess.run([g_opt, self.g_sum, self.g_adv_losses[0], self.g_l1_losses[0]]) else: for d_iter in range(self.disc_updates): _d_opt, \ d_fk_loss, \ d_rl_loss = self.sess.run([d_opt, self.d_fk_losses[0], #self.d_nfk_losses[0], self.d_rl_losses[0]]) #d_nfk_loss, \ if self.d_clip_weights: self.sess.run(self.d_clip) _g_opt, \ g_adv_loss, \ g_l1_loss = self.sess.run([g_opt, self.g_adv_losses[0], self.g_l1_losses[0]]) end = timeit.default_timer() batch_timings.append(end - start) d_fk_losses.append(d_fk_loss) #d_nfk_losses.append(d_nfk_loss) d_rl_losses.append(d_rl_loss) g_adv_losses.append(g_adv_loss) g_l1_losses.append(g_l1_loss) print('{}/{} (epoch {}), d_rl_loss = {:.5f}, ' 'd_fk_loss = {:.5f}, '#d_nfk_loss = {:.5f}, ' 'g_adv_loss = {:.5f}, g_l1_loss = {:.5f},' ' time/batch = {:.5f}, ' 'mtime/batch = {:.5f}'.format(counter, config.epoch * num_batches, curr_epoch, d_rl_loss, d_fk_loss, #d_nfk_loss, g_adv_loss, g_l1_loss, end - start, np.mean(batch_timings))) batch_idx += num_devices counter += num_devices if (counter / num_devices) % config.save_freq == 0: self.save(config.save_path, counter) self.writer.add_summary(_g_sum, counter) self.writer.add_summary(_d_sum, counter) fdict = {self.gtruth_noisy[0]:sample_noisy, self.zs[0]:sample_z} canvas_w = self.sess.run(self.Gs[0], feed_dict=fdict) swaves = sample_wav sample_dif = sample_wav - sample_noisy for m in range(min(20, canvas_w.shape[0])): print('w{} max: {} min: {}'.format(m, np.max(canvas_w[m]), np.min(canvas_w[m]))) wavfile.write(os.path.join(save_path, 'sample_{}-' '{}.wav'.format(counter, m)), 16e3, de_emph(canvas_w[m], self.preemph)) m_gtruth_path = os.path.join(save_path, 'gtruth_{}.' 'wav'.format(m)) if not os.path.exists(m_gtruth_path): wavfile.write(os.path.join(save_path, 'gtruth_{}.' 'wav'.format(m)), 16e3, de_emph(swaves[m], self.preemph)) wavfile.write(os.path.join(save_path, 'noisy_{}.' 'wav'.format(m)), 16e3, de_emph(sample_noisy[m], self.preemph)) wavfile.write(os.path.join(save_path, 'dif_{}.wav'.format(m)), 16e3, de_emph(sample_dif[m], self.preemph)) np.savetxt(os.path.join(save_path, 'd_rl_losses.txt'), d_rl_losses) np.savetxt(os.path.join(save_path, 'd_fk_losses.txt'), d_fk_losses) np.savetxt(os.path.join(save_path, 'g_adv_losses.txt'), g_adv_losses) np.savetxt(os.path.join(save_path, 'g_l1_losses.txt'), g_l1_losses) if batch_idx >= num_batches: curr_epoch += 1 # re-set batch idx batch_idx = 0 # check if we have to deactivate L1 if curr_epoch >= config.l1_remove_epoch and self.deactivated_l1 == False: print('** Deactivating L1 factor! **') self.sess.run(tf.assign(self.l1_lambda, 0.)) self.deactivated_l1 = True # check if we have to start decaying noise (if any) if curr_epoch >= config.denoise_epoch and self.deactivated_noise == False: # apply noise std decay rate decay = config.noise_decay if not hasattr(self, 'curr_noise_std'): self.curr_noise_std = self.init_noise_std new_noise_std = decay * self.curr_noise_std if new_noise_std < config.denoise_lbound: print('New noise std {} < lbound {}, setting 0.'.format(new_noise_std, config.denoise_lbound)) print('** De-activating noise layer **') # it it's lower than a lower bound, cancel out completely new_noise_std = 0. self.deactivated_noise = True else: print('Applying decay {} to noise std {}: {}'.format(decay, self.curr_noise_std, new_noise_std)) self.sess.run(tf.assign(self.disc_noise_std, new_noise_std)) self.curr_noise_std = new_noise_std if curr_epoch >= config.epoch: # done training print('Done training; epoch limit {} ' 'reached.'.format(self.epoch)) print('Saving last model at iteration {}'.format(counter)) self.save(config.save_path, counter) self.writer.add_summary(_g_sum, counter) self.writer.add_summary(_d_sum, counter) break except tf.errors.OutOfRangeError: print('Done training; epoch limit {} reached.'.format(self.epoch)) finally: coord.request_stop() coord.join(threads)