Exemple #1
0
 def clean(self, x):
     """ clean a utterance x
         x: numpy array containing the normalized noisy waveform
     """
     c_res = None
     letent_total = []
     for beg_i in range(0, x.shape[0], self.canvas_size):
         if x.shape[0] - beg_i < self.canvas_size:
             length = x.shape[0] - beg_i
             pad = (self.canvas_size) - length
         else:
             length = self.canvas_size
             pad = 0
         x_ = np.zeros((self.batch_size, self.canvas_size))
         if pad > 0:
             x_[0] = np.concatenate(
                 (x[beg_i:beg_i + length], np.zeros(pad)))
         else:
             x_[0] = x[beg_i:beg_i + length]
         print('Cleaning chunk {} -> {}'.format(beg_i, beg_i + length))
         fdict = {self.gtruth_noisy[0]: x_}
         canvas_w = self.sess.run(self.Gs[0], feed_dict=fdict)[0]
         canvas_w = canvas_w.reshape((self.canvas_size))
         print('canvas w shape: ', canvas_w.shape)
         if pad > 0:
             print('Removing padding of {} samples'.format(pad))
             # get rid of last padded samples
             canvas_w = canvas_w[:-pad]
         if c_res is None:
             c_res = canvas_w
         else:
             c_res = np.concatenate((c_res, canvas_w))
     # deemphasize
     c_res = de_emph(c_res, self.preemph)
     return c_res
Exemple #2
0
 def clean(self, x):
     """ clean a utterance x
         x: numpy array containing the normalized noisy waveform
     """
     c_res = None
     for beg_i in range(0, x.shape[0], self.canvas_size):
         if x.shape[0] - beg_i  < self.canvas_size:
             length = x.shape[0] - beg_i
             pad = (self.canvas_size) - length
         else:
             length = self.canvas_size
             pad = 0
         x_ = np.zeros((self.batch_size, self.canvas_size))
         if pad > 0:
             x_[0] = np.concatenate((x[beg_i:beg_i + length], np.zeros(pad)))
         else:
             x_[0] = x[beg_i:beg_i + length]
         print('Cleaning chunk {} -> {}'.format(beg_i, beg_i + length))
         fdict = {self.gtruth_noisy[0]:x_}
         canvas_w = self.sess.run(self.Gs[0],
                                  feed_dict=fdict)[0]
         canvas_w = canvas_w.reshape((self.canvas_size))
         print('canvas w shape: ', canvas_w.shape)
         if pad > 0:
             print('Removing padding of {} samples'.format(pad))
             # get rid of last padded samples
             canvas_w = canvas_w[:-pad]
         if c_res is None:
             c_res = canvas_w
         else:
             c_res = np.concatenate((c_res, canvas_w))
     # deemphasize
     c_res = de_emph(c_res, self.preemph)
     return c_res
Exemple #3
0
 def clean(self, x):
     """ clean a utterance x
         x: numpy array containing the normalized noisy waveform
     """
     x_ = np.zeros((self.batch_size * self.canvas_size))
     x_[:x.shape[0]] = x
     x_ = x_.reshape((self.batch_size, self.canvas_size))
     # self.x = np.array(self.x)[:, :, np.newaxis]
     fdict = {self.gtruth_noisy[0]: x_}
     canvas_w = self.sess.run(self.Gs[0], feed_dict=fdict)
     canvas_w = np.array(canvas_w)
     canvas_w = canvas_w.reshape((self.batch_size * self.canvas_size))
     print('canvas w shape: ', canvas_w.shape)
     canvas_w = canvas_w[:x.shape[0]]
     # deemphasize
     c_res = de_emph(canvas_w, self.preemph)
     return c_res
Exemple #4
0
def clean_serving(x, canvas_size, batch_size, preemph, predictor=None):
    """ clean a utterance x
        x: numpy array containing the normalized noisy waveform
    """
    c_res = None
    print('start timer')
    Time1 = time.time()
    for beg_i in range(0, x.shape[0], canvas_size):
        if x.shape[0] - beg_i < canvas_size:
            length = x.shape[0] - beg_i
            pad = canvas_size - length
        else:
            length = canvas_size
            pad = 0
        x_ = np.zeros((batch_size, canvas_size))
        if pad > 0:
            x_[0] = np.concatenate((x[beg_i:beg_i + length], np.zeros(pad)))
        else:
            x_[0] = x[beg_i:beg_i + length]
        print('Cleaning chunk {} -> {}'.format(beg_i, beg_i + length))
        canvas_w = None
        # fdict = {self.gtruth_noisy[0]:x_}
        # canvas_w = self.sess.run(self.Gs[0],
        #                         feed_dict=fdict)[0]
        test_example = {'wav_and_noisy': x_.tolist()}
        if predictor == None:
            canvas_w = x_[0]
        else:
            canvas_w = predictor.predict(test_example)
        canvas_w = canvas_w.reshape((canvas_size))
        print('canvas w shape: ', canvas_w.shape)
        if pad > 0:
            print('Removing padding of {} samples'.format(pad))
            # get rid of last padded samples
            canvas_w = canvas_w[:-pad]
        if c_res is None:
            c_res = canvas_w
        else:
            c_res = np.concatenate((c_res, canvas_w))
    # deemphasize
    c_res = de_emph(c_res, preemph)
    print('finish {}'.format(time.time() - Time1))
    return c_res
Exemple #5
0
    def train(self, config, devices):
        """ Train the SEGAN """

        print('Initializing optimizers...')
        # init optimizers
        d_opt = self.d_opt
        g_opt = self.g_opt
        num_devices = len(devices)

        try:
            init = tf.global_variables_initializer()
        except AttributeError:
            # fall back to old implementation
            init = tf.initialize_all_variables()

        print('Initializing variables...')
        self.sess.run(init)
        g_summs = [
            self.d_fk_sum,
            #self.d_nfk_sum,
            self.d_fk_loss_sum,
            #self.d_nfk_loss_sum,
            self.g_loss_sum,
            self.g_loss_l1_sum,
            self.g_loss_adv_sum,
            self.gen_summ,
            self.gen_audio_summ
        ]
        # if we have prelus, add them to summary
        if hasattr(self, 'alpha_summ'):
            g_summs += self.alpha_summ
        self.g_sum = tf.summary.merge(g_summs)
        self.d_sum = tf.summary.merge([
            self.d_loss_sum, self.d_rl_sum, self.d_rl_loss_sum,
            self.rl_audio_summ, self.real_w_summ, self.disc_noise_std_summ
        ])

        if not os.path.exists(os.path.join(config.save_path, 'train')):
            os.makedirs(os.path.join(config.save_path, 'train'))

        self.writer = tf.summary.FileWriter(
            os.path.join(config.save_path, 'train'), self.sess.graph)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        print('Sampling some wavs to store sample references...')
        # Hang onto a copy of wavs so we can feed the same one every time
        # we store samples to disk for hearing
        # pick a single batch
        sample_noisy, sample_wav, \
        sample_z = self.sess.run([self.gtruth_noisy[0],
                                  self.gtruth_wavs[0],
                                  self.zs[0]])
        print('sample noisy shape: ', sample_noisy.shape)
        print('sample wav shape: ', sample_wav.shape)
        print('sample z shape: ', sample_z.shape)

        save_path = config.save_path
        counter = 0
        # count number of samples
        num_examples = 0
        for record in tf.python_io.tf_record_iterator(self.e2e_dataset):
            num_examples += 1
        print('total examples in TFRecords {}: {}'.format(
            self.e2e_dataset, num_examples))
        # last samples (those not filling a complete batch) are discarded
        num_batches = num_examples / self.batch_size

        print('Batches per epoch: ', num_batches)

        if self.load(self.save_path):
            print('[*] Load SUCCESS')
        else:
            print('[!] Load failed')
        batch_idx = 0
        curr_epoch = 0
        batch_timings = []
        d_fk_losses = []
        #d_nfk_losses = []
        d_rl_losses = []
        g_adv_losses = []
        g_l1_losses = []
        try:
            while not coord.should_stop():
                start = timeit.default_timer()
                if counter % config.save_freq == 0:
                    for d_iter in range(self.disc_updates):
                        _d_opt, _d_sum, \
                        d_fk_loss, \
                        d_rl_loss = self.sess.run([d_opt, self.d_sum,
                                                   self.d_fk_losses[0],
                                                   #self.d_nfk_losses[0],
                                                   self.d_rl_losses[0]])
                        if self.d_clip_weights:
                            self.sess.run(self.d_clip)
                        #d_nfk_loss, \

                    # now G iterations
                    _g_opt, _g_sum, \
                    g_adv_loss, \
                    g_l1_loss = self.sess.run([g_opt, self.g_sum,
                                               self.g_adv_losses[0],
                                               self.g_l1_losses[0]])
                else:
                    for d_iter in range(self.disc_updates):
                        _d_opt, \
                        d_fk_loss, \
                        d_rl_loss = self.sess.run([d_opt,
                                                   self.d_fk_losses[0],
                                                   #self.d_nfk_losses[0],
                                                   self.d_rl_losses[0]])
                        #d_nfk_loss, \
                        if self.d_clip_weights:
                            self.sess.run(self.d_clip)

                    _g_opt, \
                    g_adv_loss, \
                    g_l1_loss = self.sess.run([g_opt, self.g_adv_losses[0],
                                               self.g_l1_losses[0]])
                end = timeit.default_timer()
                batch_timings.append(end - start)
                d_fk_losses.append(d_fk_loss)
                #d_nfk_losses.append(d_nfk_loss)
                d_rl_losses.append(d_rl_loss)
                g_adv_losses.append(g_adv_loss)
                g_l1_losses.append(g_l1_loss)
                print('{}/{} (epoch {}), d_rl_loss = {:.5f}, '
                      'd_fk_loss = {:.5f}, '  #d_nfk_loss = {:.5f}, '
                      'g_adv_loss = {:.5f}, g_l1_loss = {:.5f},'
                      ' time/batch = {:.5f}, '
                      'mtime/batch = {:.5f}'.format(
                          counter,
                          config.epoch * num_batches,
                          curr_epoch,
                          d_rl_loss,
                          d_fk_loss,
                          #d_nfk_loss,
                          g_adv_loss,
                          g_l1_loss,
                          end - start,
                          np.mean(batch_timings)))
                batch_idx += num_devices
                counter += num_devices
                if (counter / num_devices) % config.save_freq == 0:
                    self.save(config.save_path, counter)
                    self.writer.add_summary(_g_sum, counter)
                    self.writer.add_summary(_d_sum, counter)
                    fdict = {
                        self.gtruth_noisy[0]: sample_noisy,
                        self.zs[0]: sample_z
                    }
                    canvas_w = self.sess.run(self.Gs[0], feed_dict=fdict)
                    swaves = sample_wav
                    sample_dif = sample_wav - sample_noisy
                    for m in range(min(20, canvas_w.shape[0])):
                        print('w{} max: {} min: {}'.format(
                            m, np.max(canvas_w[m]), np.min(canvas_w[m])))
                        wavfile.write(
                            os.path.join(
                                save_path, 'sample_{}-'
                                '{}.wav'.format(counter, m)), 16e3,
                            de_emph(canvas_w[m], self.preemph))
                        m_gtruth_path = os.path.join(
                            save_path, 'gtruth_{}.'
                            'wav'.format(m))
                        if not os.path.exists(m_gtruth_path):
                            wavfile.write(
                                os.path.join(save_path, 'gtruth_{}.'
                                             'wav'.format(m)), 16e3,
                                de_emph(swaves[m], self.preemph))
                            wavfile.write(
                                os.path.join(save_path, 'noisy_{}.'
                                             'wav'.format(m)), 16e3,
                                de_emph(sample_noisy[m], self.preemph))
                            wavfile.write(
                                os.path.join(save_path,
                                             'dif_{}.wav'.format(m)), 16e3,
                                de_emph(sample_dif[m], self.preemph))
                        np.savetxt(os.path.join(save_path, 'd_rl_losses.txt'),
                                   d_rl_losses)
                        np.savetxt(os.path.join(save_path, 'd_fk_losses.txt'),
                                   d_fk_losses)
                        np.savetxt(os.path.join(save_path, 'g_adv_losses.txt'),
                                   g_adv_losses)
                        np.savetxt(os.path.join(save_path, 'g_l1_losses.txt'),
                                   g_l1_losses)

                if batch_idx >= num_batches:
                    curr_epoch += 1
                    # re-set batch idx
                    batch_idx = 0
                    # check if we have to deactivate L1
                    if curr_epoch >= config.l1_remove_epoch and self.deactivated_l1 == False:
                        print('** Deactivating L1 factor! **')
                        self.sess.run(tf.assign(self.l1_lambda, 0.))
                        self.deactivated_l1 = True
                    # check if we have to start decaying noise (if any)
                    if curr_epoch >= config.denoise_epoch and self.deactivated_noise == False:
                        # apply noise std decay rate
                        decay = config.noise_decay
                        if not hasattr(self, 'curr_noise_std'):
                            self.curr_noise_std = self.init_noise_std
                        new_noise_std = decay * self.curr_noise_std
                        if new_noise_std < config.denoise_lbound:
                            print('New noise std {} < lbound {}, setting 0.'.
                                  format(new_noise_std, config.denoise_lbound))
                            print('** De-activating noise layer **')
                            # it it's lower than a lower bound, cancel out completely
                            new_noise_std = 0.
                            self.deactivated_noise = True
                        else:
                            print(
                                'Applying decay {} to noise std {}: {}'.format(
                                    decay, self.curr_noise_std, new_noise_std))
                        self.sess.run(
                            tf.assign(self.disc_noise_std, new_noise_std))
                        self.curr_noise_std = new_noise_std
                if curr_epoch >= config.epoch:
                    # done training
                    print('Done training; epoch limit {} '
                          'reached.'.format(self.epoch))
                    print('Saving last model at iteration {}'.format(counter))
                    self.save(config.save_path, counter)
                    self.writer.add_summary(_g_sum, counter)
                    self.writer.add_summary(_d_sum, counter)
                    break
        except tf.errors.OutOfRangeError:
            print('Done training; epoch limit {} reached.'.format(self.epoch))
        finally:
            coord.request_stop()
        coord.join(threads)
Exemple #6
0
test_error = test_error ** 0.5


print( 'training_error', "{:.9f}".format(training_error))
print( 'test_error', "{:.9f}".format(test_error))

#print('learning_rate= ', learning_rate)
#print('num_quantization_steps= ', num_steps)
#%%##########################################################################
# Savings network

AE_output={};


#AE_output['y_pred_test']=y_pred_test
#AE_output['y_true_test']=y_true_test
    
if emphasis:
    y_pred_test = de_emph(y_pred_test, input_dim)
    y_true_test = de_emph(y_true_test, input_dim)

AE_output['y_pred_test']=y_pred_test
AE_output['y_true_test']=y_true_test
    

si.savemat("/home/hsadeghi/Dropbox/May/past_codes/rnn_AE_output.mat",
           AE_output);

sess.close()
Exemple #7
0
    def train(self, config, devices):
        """ Train the SEGAN """

        print('Initializing optimizers...')
        # init optimizers
        d_opt = self.d_opt
        g_opt = self.g_opt
        num_devices = len(devices)

        try:
            init = tf.global_variables_initializer()
        except AttributeError:
            # fall back to old implementation
            init = tf.initialize_all_variables()

        print('Initializing variables...')
        self.sess.run(init)
        g_summs = [self.d_fk_sum,
                   #self.d_nfk_sum,
                   self.d_fk_loss_sum,
                   #self.d_nfk_loss_sum,
                   self.g_loss_sum,
                   self.g_loss_l1_sum,
                   self.g_loss_adv_sum,
                   self.gen_summ,
                   self.gen_audio_summ]
        # if we have prelus, add them to summary
        if hasattr(self, 'alpha_summ'):
            g_summs += self.alpha_summ
        self.g_sum = tf.summary.merge(g_summs)
        self.d_sum = tf.summary.merge([self.d_loss_sum,
                                       self.d_rl_sum,
                                       self.d_rl_loss_sum,
                                       self.rl_audio_summ,
                                       self.real_w_summ,
                                       self.disc_noise_std_summ])

        if not os.path.exists(os.path.join(config.save_path, 'train')):
            os.makedirs(os.path.join(config.save_path, 'train'))

        self.writer = tf.summary.FileWriter(os.path.join(config.save_path,
                                                         'train'),
                                            self.sess.graph)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        print('Sampling some wavs to store sample references...')
        # Hang onto a copy of wavs so we can feed the same one every time
        # we store samples to disk for hearing
        # pick a single batch
        sample_noisy, sample_wav, \
        sample_z = self.sess.run([self.gtruth_noisy[0],
                                  self.gtruth_wavs[0],
                                  self.zs[0]])
        print('sample noisy shape: ', sample_noisy.shape)
        print('sample wav shape: ', sample_wav.shape)
        print('sample z shape: ', sample_z.shape)

        save_path = config.save_path
        counter = 0
        # count number of samples
        num_examples = 0
        for record in tf.python_io.tf_record_iterator(self.e2e_dataset):
            num_examples += 1
        print('total examples in TFRecords {}: {}'.format(self.e2e_dataset,
                                                          num_examples))
        # last samples (those not filling a complete batch) are discarded
        num_batches = num_examples / self.batch_size

        print('Batches per epoch: ', num_batches)

        if self.load(self.save_path):
            print('[*] Load SUCCESS')
        else:
            print('[!] Load failed')
        batch_idx = 0
        curr_epoch = 0
        batch_timings = []
        d_fk_losses = []
        #d_nfk_losses = []
        d_rl_losses = []
        g_adv_losses = []
        g_l1_losses = []
        try:
            while not coord.should_stop():
                start = timeit.default_timer()
                if counter % config.save_freq == 0:
                    for d_iter in range(self.disc_updates):
                        _d_opt, _d_sum, \
                        d_fk_loss, \
                        d_rl_loss = self.sess.run([d_opt, self.d_sum,
                                                   self.d_fk_losses[0],
                                                   #self.d_nfk_losses[0],
                                                   self.d_rl_losses[0]])
                        if self.d_clip_weights:
                            self.sess.run(self.d_clip)
                        #d_nfk_loss, \

                    # now G iterations
                    _g_opt, _g_sum, \
                    g_adv_loss, \
                    g_l1_loss = self.sess.run([g_opt, self.g_sum,
                                               self.g_adv_losses[0],
                                               self.g_l1_losses[0]])
                else:
                    for d_iter in range(self.disc_updates):
                        _d_opt, \
                        d_fk_loss, \
                        d_rl_loss = self.sess.run([d_opt,
                                                   self.d_fk_losses[0],
                                                   #self.d_nfk_losses[0],
                                                   self.d_rl_losses[0]])
                        #d_nfk_loss, \
                        if self.d_clip_weights:
                            self.sess.run(self.d_clip)

                    _g_opt, \
                    g_adv_loss, \
                    g_l1_loss = self.sess.run([g_opt, self.g_adv_losses[0],
                                               self.g_l1_losses[0]])
                end = timeit.default_timer()
                batch_timings.append(end - start)
                d_fk_losses.append(d_fk_loss)
                #d_nfk_losses.append(d_nfk_loss)
                d_rl_losses.append(d_rl_loss)
                g_adv_losses.append(g_adv_loss)
                g_l1_losses.append(g_l1_loss)
                print('{}/{} (epoch {}), d_rl_loss = {:.5f}, '
                      'd_fk_loss = {:.5f}, '#d_nfk_loss = {:.5f}, '
                      'g_adv_loss = {:.5f}, g_l1_loss = {:.5f},'
                      ' time/batch = {:.5f}, '
                      'mtime/batch = {:.5f}'.format(counter,
                                                    config.epoch * num_batches,
                                                    curr_epoch,
                                                    d_rl_loss,
                                                    d_fk_loss,
                                                    #d_nfk_loss,
                                                    g_adv_loss,
                                                    g_l1_loss,
                                                    end - start,
                                                    np.mean(batch_timings)))
                batch_idx += num_devices
                counter += num_devices
                if (counter / num_devices) % config.save_freq == 0:
                    self.save(config.save_path, counter)
                    self.writer.add_summary(_g_sum, counter)
                    self.writer.add_summary(_d_sum, counter)
                    fdict = {self.gtruth_noisy[0]:sample_noisy,
                             self.zs[0]:sample_z}
                    canvas_w = self.sess.run(self.Gs[0],
                                             feed_dict=fdict)
                    swaves = sample_wav
                    sample_dif = sample_wav - sample_noisy
                    for m in range(min(20, canvas_w.shape[0])):
                        print('w{} max: {} min: {}'.format(m,
                                                           np.max(canvas_w[m]),
                                                           np.min(canvas_w[m])))
                        wavfile.write(os.path.join(save_path,
                                                   'sample_{}-'
                                                   '{}.wav'.format(counter, m)),
                                      16e3,
                                      de_emph(canvas_w[m],
                                              self.preemph))
                        m_gtruth_path = os.path.join(save_path, 'gtruth_{}.'
                                                                'wav'.format(m))
                        if not os.path.exists(m_gtruth_path):
                            wavfile.write(os.path.join(save_path,
                                                       'gtruth_{}.'
                                                       'wav'.format(m)),
                                          16e3,
                                          de_emph(swaves[m],
                                                  self.preemph))
                            wavfile.write(os.path.join(save_path,
                                                       'noisy_{}.'
                                                       'wav'.format(m)),
                                          16e3,
                                          de_emph(sample_noisy[m],
                                                  self.preemph))
                            wavfile.write(os.path.join(save_path,
                                                       'dif_{}.wav'.format(m)),
                                          16e3,
                                          de_emph(sample_dif[m],
                                                  self.preemph))
                        np.savetxt(os.path.join(save_path, 'd_rl_losses.txt'),
                                   d_rl_losses)
                        np.savetxt(os.path.join(save_path, 'd_fk_losses.txt'),
                                   d_fk_losses)
                        np.savetxt(os.path.join(save_path, 'g_adv_losses.txt'),
                                   g_adv_losses)
                        np.savetxt(os.path.join(save_path, 'g_l1_losses.txt'),
                                   g_l1_losses)

                if batch_idx >= num_batches:
                    curr_epoch += 1
                    # re-set batch idx
                    batch_idx = 0
                    # check if we have to deactivate L1
                    if curr_epoch >= config.l1_remove_epoch and self.deactivated_l1 == False:
                        print('** Deactivating L1 factor! **')
                        self.sess.run(tf.assign(self.l1_lambda, 0.))
                        self.deactivated_l1 = True
                    # check if we have to start decaying noise (if any)
                    if curr_epoch >= config.denoise_epoch and self.deactivated_noise == False:
                        # apply noise std decay rate
                        decay = config.noise_decay
                        if not hasattr(self, 'curr_noise_std'):
                            self.curr_noise_std = self.init_noise_std
                        new_noise_std = decay * self.curr_noise_std
                        if new_noise_std < config.denoise_lbound:
                            print('New noise std {} < lbound {}, setting 0.'.format(new_noise_std, config.denoise_lbound))
                            print('** De-activating noise layer **')
                            # it it's lower than a lower bound, cancel out completely
                            new_noise_std = 0.
                            self.deactivated_noise = True
                        else:
                            print('Applying decay {} to noise std {}: {}'.format(decay, self.curr_noise_std, new_noise_std))
                        self.sess.run(tf.assign(self.disc_noise_std, new_noise_std))
                        self.curr_noise_std = new_noise_std
                if curr_epoch >= config.epoch:
                    # done training
                    print('Done training; epoch limit {} '
                          'reached.'.format(self.epoch))
                    print('Saving last model at iteration {}'.format(counter))
                    self.save(config.save_path, counter)
                    self.writer.add_summary(_g_sum, counter)
                    self.writer.add_summary(_d_sum, counter)
                    break
        except tf.errors.OutOfRangeError:
            print('Done training; epoch limit {} reached.'.format(self.epoch))
        finally:
            coord.request_stop()
        coord.join(threads)