def convert_linear_spectrogram_to_audio(self, spec, Ts=None): batch_size = spec.shape[0] T_max = spec.shape[2] if Ts is None: Ts = [T_max] * batch_size max_size = (max(Ts) - 1) * self.l_hop audios = torch.zeros(batch_size, max_size) # Lazy GL implementation. Could be improved by moving to pytorch. for i in range(batch_size): audio = griffin_lim(spec[i, :, 0:Ts[i]].cpu().numpy(), n_iters=self.n_iters, n_fft=self.n_fft) my_len = audio.shape[0] audios[i, 0:my_len] = torch.from_numpy(audio) return audios
def validation_step(self, batch, batch_idx): _, y_spec, _, _, T_ys, _, path_speech = batch x_mel = self.ed_mel2spec.spec_to_mel(y_spec) x_spec = self(mel=x_mel) z_mel = self.ed_mel2spec.spec_to_mel(x_spec) loss_L1 = self.calc_loss(x_spec, y_spec, T_ys, self.criterion) loss_reg = self.calc_loss(x_mel, z_mel, T_ys, self.criterion) loss = loss_L1 + self.lreg_factor * loss_reg output = { 'val_loss': loss, 'loss_L1': loss_L1, 'loss_reg': loss_reg, } if self._cfg.train_params.validate_scores: ''' For validaiton, estimate the wave using standard griffin lim, comparing the real wave with the griffin lim counterpart. ''' cnt = x_spec.shape[0] np_x = x_spec.to('cpu').numpy() np_y = y_spec.to('cpu').numpy() stoi_real, pesq_real, stoi_est, pesq_est = (0.0, 0.0, 0.0, 0.0) for p in range(cnt): y_wav_path = path_speech[p] wav = sf.read(y_wav_path)[0].astype(np.float32) y_est_wav = griffin_lim(np_y[p, 0, :, :]) x_est_wav = griffin_lim(np_x[p, 0, :, :]) min_size = min(wav.shape[0], x_est_wav.shape[0], y_est_wav.shape[0]) wav = wav[0:min_size, ...] y_est_wav = y_est_wav[0:min_size, ...] x_est_wav = x_est_wav[0:min_size, ...] measure = eval_tts_scores(x_est_wav, wav) stoi_real += torch.tensor(measure['STOI']) pesq_real += torch.tensor(measure['PESQ']) measure = eval_tts_scores(x_est_wav, y_est_wav) stoi_est += torch.tensor(measure['STOI']) pesq_est += torch.tensor(measure['PESQ']) output['stoi_real'] = stoi_real / cnt output['pesq_real'] = pesq_real / cnt output['stoi_est'] = stoi_est / cnt output['pesq_est'] = pesq_est / cnt for (k, s) in self.f_specs: new_loss = self.calc_loss_smooth(x_spec, y_spec, T_ys, k, s) output[f'loss_{k}_{s}'] = new_loss loss = loss + new_loss output['val_loss'] = loss return output