def mod_power(self, cvmcep, rmcep, alpha=0.42, irlen=1024): """Power modification based on inpulse responce Parameters ---------- cvmcep : array, shape (`T`, `dim`) array of converted mel-cepstrum rmcep : array, shape (`T`, `dim`) array of reference mel-cepstrum alpha : float, optional All-path filter transfer function Default set to 0.42 irlen : int, optional Length for IIR filter Default set to 1024 Return ------ modified_cvmcep : array, shape (`T`, `dim`) array of power modified converted mel-cepstrum """ if rmcep.shape != cvmcep.shape: raise ValueError("The shapes of the converted and \ reference mel-cepstrum are different: \ {} / {}".format(cvmcep.shape, rmcep.shape)) cv_e = pysptk.mc2e(cvmcep, alpha=alpha, irlen=irlen) r_e = pysptk.mc2e(rmcep, alpha=alpha, irlen=irlen) dpow = np.log(r_e / cv_e) / 2 modified_cvmcep = np.copy(cvmcep) modified_cvmcep[:, 0] += dpow return modified_cvmcep
def mod_p(m, r): m1 = pysptk.mc2e(m, alpha=0.42, irlen=256) r1 = pysptk.mc2e(r, alpha=0.42, irlen=256) p = np.log(r1 / m1) / 2 t_m = np.copy(m) t_m[:, 0] = t_m[:, 0] + p return t_m
def mod_pow(cvmcep, mcep, alpha=MCEP_ALPHA, irlen=IRLEN): cv_e = ps.mc2e(cvmcep, alpha=alpha, irlen=irlen) r_e = ps.mc2e(mcep, alpha=alpha, irlen=irlen) dpow = np.log(r_e / cv_e) / 2 mod_cvmcep = np.copy(cvmcep) mod_cvmcep[:, 0] += dpow return mod_cvmcep
def mod_power(cvmcep, rmcep, alpha, irlen=1024): if rmcep.shape != cvmcep.shape: raise ValueError("The shapes of the converted and \ reference mel-cepstrum are different: \ {} / {}".format(cvmcep.shape, rmcep.shape)) cv_e = pysptk.mc2e(cvmcep, alpha=alpha, irlen=irlen) r_e = pysptk.mc2e(rmcep, alpha=alpha, irlen=irlen) dpow = np.log(r_e / cv_e) / 2 modified_cvmcep = np.copy(cvmcep) modified_cvmcep[:, 0] += dpow return modified_cvmcep
def mod_power(cvmcep, rmcep, alpha=0.42, irlen=1024): """Power modification based on inpulse responce Parameters ---------- cvmcep : array, shape (`T`, `dim`) array of converted mel-cepstrum rmcep : array, shape (`T`, `dim`) array of reference mel-cepstrum alpha : float, optional All-path filter transfer function Default set to 0.42 irlen : int, optional Length for IIR filter Default set to 1024 Return ------ modified_cvmcep : array, shape (`T`, `dim`) array of power modified converted mel-cepstrum """ if rmcep.shape != cvmcep.shape: raise ValueError("The shapes of the converted and \ reference mel-cepstrum are different: \ {} / {}".format(cvmcep.shape, rmcep.shape)) cv_e = pysptk.mc2e(cvmcep, alpha=alpha, irlen=irlen) r_e = pysptk.mc2e(rmcep, alpha=alpha, irlen=irlen) dpow = np.log(r_e / cv_e) / 2 modified_cvmcep = np.copy(cvmcep) modified_cvmcep[:, 0] += dpow return modified_cvmcep
def forward(self, audio, feat_kinds=['sp', 'mcc', 'f0', 'ap', 'en']): """Computes world features from a batch of waves PARAMS ------ audio: Variable(torch.FloatTensor) with shape (T) in range [-1, 1] RETURNS ------- feat: torch.FloatTensor of shape ((SP+MCC+F0+AP+1+1), T) Contains features in "feat_kinds": SP, MCC, F0, AP, SP_en, MCC_en """ device = audio.device audio = audio.detach().cpu().numpy() feat = dict() for feat_kind in feat_kinds: feat[feat_kind] = list() for x in audio: # Preprocess x = x * MAX_WAV_VALUE x = self.low_cut_filter(x, cutoff=self.cutoff_freq) # Extract f0 f0, time_axis = pyworld.harvest(x, self.fs, f0_floor=self.minf0, f0_ceil=self.maxf0, frame_period=self.shiftms) # Extract sp sp = pyworld.cheaptrick(x, f0, time_axis, self.fs, fft_size=self.fft_size) if 'sp' in feat_kinds: feat['sp'].append(torch.from_numpy(sp).float().t()) # Extract ap if 'ap' in feat_kinds: ap = pyworld.d4c(x, f0, time_axis, self.fs, fft_size=self.fft_size) feat['ap'].append(torch.from_numpy(ap).float().t()) # Extract mcc if 'mcc' in feat_kinds: mcc = pysptk.sp2mc(sp, self.mcc_dim, self.mcc_alpha) feat['mcc'].append(torch.from_numpy(mcc).float().t()) # Extract energy if 'en' in feat_kinds: mcc = pysptk.sp2mc(sp, self.mcc_dim, self.mcc_alpha) en = pysptk.mc2e(mcc, alpha=self.mcc_alpha, irlen=256) # en = np.clip(en, 1e-10, None) feat['en'].append(torch.from_numpy(en).float().view(-1)) # Fix f0 if 'f0' in feat_kinds: f0[f0 < 0] = 0 feat['f0'].append(torch.from_numpy(f0).float().view(-1)) for key, val_list in feat.items(): feat[key] = torch.cat([val.unsqueeze(0) for val in val_list], dim=0).to(device) return feat
def test_mc2b(): x = windowed_dummy_data(1024) mc = pysptk.mcep(x) assert pysptk.mc2e(mc) > 0