def __getitem__(self, key): f0 = self.f0[key] spec = self.spectrum_envelope[key] ape = self.aperiodicity[key] is_voiced = self._is_voiced if is_voiced is not None: is_voiced = is_voiced[key] mcep = None if self._mel_cepstrum.data is not None: mcep = self.mel_cepstrum.data[key] if isinstance(key, int): if mcep is None: mcep = self.mel_cepstrum.data[key] return f0, spec, ape, mcep result = kwiiyatta.feature(self) result._f0 = f0 result._spectrum_envelope = spec result._aperiodicity = ape if is_voiced is not None: result._is_voiced = is_voiced if mcep is not None: result._mel_cepstrum.data = mcep return result
def test_mlsa_filter(fs1, fs2): np.random.seed(0) clb = feature.get_analyzer(dataset.get_wav_path(dataset.CLB_WAV, fs=fs1)) slt = feature.get_analyzer(dataset.get_wav_path(dataset.SLT_WAV, fs=fs2)) slt_aligned = kwiiyatta.align(slt, clb) mcep_diff = copy.copy(slt_aligned.mel_cepstrum) mcep_diff.data = mcep_diff.data - clb.mel_cepstrum.resample_data(slt.fs) result = kwiiyatta.apply_mlsa_filter(clb.wavdata, mcep_diff) expected = kwiiyatta.feature(clb) if clb.fs > slt.fs: slt_shape = clb.spectrum_len * slt.fs // clb.fs expected.spectrum_envelope = \ np.hstack(( feature.override_power( slt_aligned.reshaped_spectrum_envelope(slt_shape), clb.spectrum_envelope[:, :slt_shape] ), clb.spectrum_envelope[:, slt_shape:] )) else: expected.spectrum_envelope = \ slt_aligned.resample_spectrum_envelope(clb.fs) feature.override_spectrum_power(expected, clb) actual = kwiiyatta.Analyzer(result) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, actual) assert_any.between(0.051, f0_diff, 0.078) assert_any.between(0.32, spec_diff, 0.55) assert_any.between(0.039, ape_diff, 0.073) assert_any.between(0.038, mcep_diff, 0.088)
def create_silence_feature(cls, frame_len, fs, **kwargs): kwargs.setdefault('Synthesizer', cls) feature = kwiiyatta.feature(fs, **kwargs) feature.f0 = cls.silence_f0(frame_len, fs) feature.spectrum_envelope = \ cls.silence_spectrum_envelope(frame_len, fs) feature.aperiodicity = cls.silence_aperiodicity(frame_len, fs) return feature
def test_reshape(check): from kwiiyatta.vocoder.world import WorldAnalyzer a1 = kwiiyatta.analyze_wav(dataset.CLB_WAV) a2 = WorldAnalyzer.load_wav(dataset.CLB_WAV) fft_size = (a1.spectrum_len - 1) * 2 * 2 a2.extract_spectrum_envelope(fft_size=fft_size) a2.extract_aperiodicity(fft_size=fft_size) assert a1.spectrum_len != a2.spectrum_len f2_r = kwiiyatta.reshape(a2, a1.spectrum_len) assert f2_r.spectrum_len == a1.spectrum_len f2 = kwiiyatta.feature(a2) assert f2.spectrum_len != f2_r.spectrum_len assert (feature.calc_powered_diff( f2_r.spectrum_envelope, f2.reshaped_spectrum_envelope(a1.spectrum_len)) == 0) assert (feature.calc_diff(f2_r.aperiodicity, f2.reshaped_aperiodicity(a1.spectrum_len)) == 0) _, spec_diff, ape_diff, mcep_diff = feature.calc_feature_diffs(a1, f2_r) check.round_equal(0.0025, spec_diff) check.round_equal(0.00087, ape_diff) check.round_equal(0.0012, mcep_diff) a1._spectrum_envelope = None a1._aperiodicity = None spec_diff = feature.calc_powered_diff( a2.spectrum_envelope, a1.reshaped_spectrum_envelope(f2.spectrum_len)) check.round_equal(0.0038, spec_diff) ape_diff = feature.calc_diff(a2.aperiodicity, a1.reshaped_aperiodicity(f2.spectrum_len)) check.round_equal(0.00088, ape_diff) f1 = kwiiyatta.feature(a1) f1.extract_mel_cepstrum() f1.spectrum_envelope = None spec_diff = feature.calc_powered_diff( a2.spectrum_envelope, f1.reshaped_spectrum_envelope(a2.spectrum_len)) check.round_equal(0.090, spec_diff)
def convert(conf, converter, src_path, diffvc=True): src = conf.create_analyzer(src_path, Analyzer=kwiiyatta.analyze_wav) mcep = converter.convert(src.mel_cepstrum, diff=diffvc) if diffvc: wav = kwiiyatta.apply_mlsa_filter(src, mcep) else: feature = kwiiyatta.feature(src) feature.mel_cepstrum = mcep wav = feature.synthesize() return wav
def make_expected_feature(wavpath, fs=16000, fullset=False): src = feature.get_analyzer( dataset.get_wav_path(dataset.CLB_DIR / wavpath, fullset, fs=fs)) tgt = feature.get_analyzer( dataset.get_wav_path(dataset.SLT_DIR / wavpath, fullset, fs=fs)) tgt_aligned = kwiiyatta.align(tgt, src) expected = kwiiyatta.feature(src) expected.spectrum_envelope = tgt_aligned.spectrum_envelope feature.override_spectrum_power(expected, src) return expected
def test_resample_down(fs1, fs2, wavfile, frame_period): if fs2 < fs1: fs1, fs2 = fs2, fs1 a1 = feature.get_analyzer(dataset.get_wav_path(wavfile, fs=fs1), frame_period=frame_period) a2 = feature.get_analyzer(dataset.get_wav_path(wavfile, fs=fs2), frame_period=frame_period) a2_r = kwiiyatta.resample(a2, fs1) a2._spectrum_envelope = None a2._aperiodicity = None a2._mel_cepstrum.data = None assert a1.fs == a2_r.fs assert (feature.calc_diff(a2_r.mel_cepstrum.data, a2.resample_mel_cepstrum(a2_r.fs).data) == 0) assert feature.calc_diff(a2_r.f0, a2.f0) == 0 assert (feature.calc_powered_diff(a2_r.spectrum_envelope, a2.resample_spectrum_envelope( a2_r.fs)) == 0) assert (feature.calc_diff(a2_r.aperiodicity, a2.resample_aperiodicity(a2_r.fs)) == 0) assert a2.mel_cepstrum.order == a2_r.mel_cepstrum.order f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(a1, a2_r) assert_any.between(0.0012, f0_diff, 0.014) assert_any.between(0.0025, spec_diff, 0.0094) assert_any.between(0.0015, ape_diff, 0.048) assert_any.between(0.011, mcep_diff, 0.031) a2_r_wav = a2_r.synthesize() a2_r_s = kwiiyatta.Analyzer(a2_r_wav, frame_period=frame_period) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(a1, a2_r_s) assert_any.between(0.055, f0_diff, 0.11) assert_any.between(0.20, spec_diff, 0.23) assert_any.between(0.072, ape_diff, 0.10) assert_any.between(0.038, mcep_diff, 0.056) f2 = kwiiyatta.feature(a2) f2.extract_mel_cepstrum() f2.spectrum_envelope = None f2_mcep_r = f2.resample_mel_cepstrum(a1.fs) mcep_diff = feature.calc_diff(a1.mel_cepstrum.data, f2_mcep_r.data) assert_any.between(0.014, mcep_diff, 0.041) a2_mcep_r = kwiiyatta.resample(a2.mel_cepstrum, a1.fs) mcep_diff = feature.calc_diff(a2_mcep_r.data, f2_mcep_r.data) assert mcep_diff == 0
def function(self, feature, key): f = kwiiyatta.feature(feature) if self.order is None: self.order = f.mel_cepstrum_order elif self.order != feature.mel_cepstrum_order: f.mel_cepstrum_order = self.order mcep_data = f.mel_cepstrum.data if self.fs is None: self.fs = f.fs elif self.fs != f.fs: mcep_data = f.resample_mel_cepstrum(self.fs).data return mcep_data[:, 1:] # Drop 1st (power) dimension
def test_mcep_dataset(): a = kwiiyatta.feature(feature.get_analyzer(dataset.CLB_WAV)) base = {'order24': copy.copy(a)} base['order32'] = copy.copy(a) base['order32'].mel_cepstrum_order = 32 base['fs44'] = copy.copy(a) base['fs44'].mel_cepstrum._fs = 44100 mcep_dataset = MelCepstrumDataset(base) mcep_dataset['order24'] assert (a.mel_cepstrum.data[:, 1:] == mcep_dataset['order32']).all() assert (base['fs44'].resample_mel_cepstrum(16000).data[:, 1:] == mcep_dataset['fs44']).all()
def test_feature(): a = feature.get_analyzer(dataset.CLB_WAV) f = kwiiyatta.feature(a) assert f == a f._mel_cepstrum._fs *= 2 assert f != a f._mel_cepstrum._fs = a.fs assert f == a f._mel_cepstrum._frame_period *= 2 assert f != a f._mel_cepstrum._frame_period = a.frame_period assert f == a f.f0 = None assert f != a f.f0 = a.f0 f.spectrum_envelope = copy.copy(a.spectrum_envelope) assert f == a f.spectrum_envelope[0][0] += 0.001 assert f != a f.spectrum_envelope[0][0] = a.spectrum_envelope[0][0] f.aperiodicity = copy.copy(f.aperiodicity) assert f == a f.aperiodicity[-1][-1] += 0.001 assert f != a f.aperiodicity[-1][-1] = a.aperiodicity[-1][-1] assert f == a half = len(a.f0) // 2 f0, spec, ape, mcep = a[half] assert f0 == a.f0[half] assert (spec == a.spectrum_envelope[half]).all() assert (ape == a.aperiodicity[half]).all() assert (mcep == a.mel_cepstrum.data[half]).all() f = a[:half] assert len(f.f0) == len(f.spectrum_envelope) == len(f.aperiodicity) == half assert (f.f0 == a.f0[:half]).all() assert (f.spectrum_envelope == a.spectrum_envelope[:half]).all() assert (f.aperiodicity == a.aperiodicity[:half]).all() assert (f.mel_cepstrum.data == a.mel_cepstrum.data[:half]).all()
def test_mcep_converter(): a = kwiiyatta.feature(feature.get_analyzer(dataset.CLB_WAV)) base = {'key': copy.copy(a)} mcep_converter = MelCepstrumFeatureConverter(NopConverter()) mcep_converter.train(base, ['key']) mcep = feature.get_analyzer(dataset.CLB_WAV).mel_cepstrum with pytest.raises(ValueError) as e: a.mel_cepstrum_order = 32 mcep_converter.convert(a.mel_cepstrum) assert 'order is expected to 24 but 32' == str(e.value) a.mel_cepstrum_order = 24 mcep = a.mel_cepstrum mcep._fs = 44100 mcep_converter.expected_convert = \ a.resample_mel_cepstrum(16000).data[:, 1:] assert mcep_converter.convert(mcep).fs == 16000
def test_reanalyze(wavfile, dtype, fs, frame_period): a1 = feature.get_analyzer(dataset.get_wav_path(wavfile, dtype=dtype, fs=fs), frame_period=frame_period) assert a1.fs == fs analyzer_wav = a1.synthesize() feature_wav = kwiiyatta.feature(a1).synthesize() assert analyzer_wav.fs == feature_wav.fs assert (analyzer_wav.data == feature_wav.data).all() a2 = kwiiyatta.Analyzer(analyzer_wav, frame_period=frame_period) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(a1, a2) assert_any.between(0.052, f0_diff, 0.094) assert_any.between(0.20, spec_diff, 0.22) assert_any.between(0.063, ape_diff, 0.096) assert_any.between(0.030, mcep_diff, 0.055)
def pad_silence(feature, frame_len): padded_feature = kwiiyatta.feature(feature) padded_feature.f0 = np.concatenate( (feature.Synthesizer.silence_f0(frame_len, feature.fs), feature.f0, feature.Synthesizer.silence_f0(frame_len, feature.fs))) padded_feature.spectrum_envelope = np.concatenate( (feature.Synthesizer.silence_spectrum_envelope(frame_len, feature.fs, feature.spectrum_len), feature.spectrum_envelope, feature.Synthesizer.silence_spectrum_envelope(frame_len, feature.fs, feature.spectrum_len))) padded_feature.aperiodicity = np.concatenate( (feature.Synthesizer.silence_aperiodicity(frame_len, feature.fs, feature.spectrum_len), feature.aperiodicity, feature.Synthesizer.silence_aperiodicity(frame_len, feature.fs, feature.spectrum_len))) return padded_feature
def test_trimmed_dataset(): def add_margin(data, margin_len=64): if len(data.shape) == 1: pad = np.zeros((margin_len, )) else: pad = np.zeros((margin_len, data.shape[1])) return np.concatenate((pad, data, pad), axis=0) f = kwiiyatta.feature(feature.get_analyzer(dataset.CLB_WAV)) f.f0 = add_margin(f.f0) f.spectrum_envelope = add_margin(f.spectrum_envelope) f.aperiodicity = add_margin(f.aperiodicity) len_f = len(f.f0) d = TrimmedDataset({'f': f}) len_d = len(d['f'].f0) assert len_d == len_f - 64 assert np.abs(d['f'].spectrum_envelope[0]).sum() > 0 assert np.abs(d['f'].spectrum_envelope[-1]).sum() > 0
def test_analyzer_feature(fs, frame_period): from kwiiyatta.vocoder.world import WorldSynthesizer a = kwiiyatta.analyze_wav(dataset.get_wav_path(dataset.CLB_WAV, fs=fs), frame_period=frame_period, mcep_order=36) assert a.mel_cepstrum_order == 36 assert a.spectrum_len == WorldSynthesizer.fs_spectrum_len(a.fs) frame_len = a.frame_len f = kwiiyatta.feature(a) assert a is not f assert a.fs == f.fs assert a.frame_period == frame_period assert a.frame_period == f.frame_period assert a.frame_len == f.frame_len == frame_len assert a.spectrum_len == f.spectrum_len assert a.mel_cepstrum_order == f.mel_cepstrum_order assert feature.calc_diff(a.f0, f.f0) == 0 assert feature.calc_powered_diff(a.spectrum_envelope, f.spectrum_envelope) == 0 assert feature.calc_diff(a.aperiodicity, f.aperiodicity) == 0 assert f._mel_cepstrum.data is None assert feature.calc_diff(a.mel_cepstrum.data, f.mel_cepstrum.data) == 0
def main(): conf = kwiiyatta.Config() conf.add_argument('source', type=str, help='Source wav file of voice resynthesis') conf.add_argument('--result-dir', type=str, help='Path to write result wav files') conf.add_argument('--mcep', action='store_true', help='Use mel-cepstrum to resynthesize') conf.add_argument('--play', action='store_true', help='Play result wavform') conf.add_argument('--no-save', action='store_true', help='Not to write result wav file, and play wavform') conf.add_argument('--carrier', type=str, help='Wav file to use for carrier') conf.add_argument('--diffvc', action='store_true', help='Use difference MelCepstrum synthesis') conf.add_argument('--result-fs', type=int, help='Result waveform sampling rate') conf.parse_args() conf.play |= conf.no_save source_path = pathlib.Path(conf.source).resolve() source = conf.create_analyzer(source_path, Analyzer=kwiiyatta.analyze_wav) if conf.result_dir is None: result_path = source_path.with_suffix('.resynth.wav') else: result_path = pathlib.Path(conf.result_dir) / source_path.name feature = kwiiyatta.feature(source) wav = None if conf.carrier is not None: carrier = conf.create_analyzer(conf.carrier, Analyzer=kwiiyatta.analyze_wav) feature = kwiiyatta.align(source, carrier) if conf.diffvc: mcep_diff = copy.copy(feature.mel_cepstrum) mcep_diff.data -= carrier.mel_cepstrum.data wav = kwiiyatta.apply_mlsa_filter(carrier.wavdata, mcep_diff) else: feature.f0 = carrier.f0 if wav is None: if conf.mcep: feature.extract_mel_cepstrum() feature.spectrum_envelope = None if conf.result_fs is not None: feature.resample(conf.result_fs) wav = feature.synthesize() if not conf.no_save: result_path.parent.mkdir(parents=True, exist_ok=True) wav.save(result_path) if conf.play: wav.play()
def test_resample_up(fs1, fs2, wavfile, frame_period): np.random.seed(0) if fs1 < fs2: fs1, fs2 = fs2, fs1 a1 = feature.get_analyzer(dataset.get_wav_path(wavfile, fs=fs1), frame_period=frame_period) a2 = feature.get_analyzer(dataset.get_wav_path(wavfile, fs=fs2), frame_period=frame_period) a2_r = kwiiyatta.resample(a2, fs1) a2._spectrum_envelope = None a2._aperiodicity = None a2._mel_cepstrum.data = None assert a1.fs == a2_r.fs assert feature.calc_diff(a2_r.f0, a2.f0) == 0 assert_any.between( 1.7e-8, feature.calc_powered_diff(a2_r.spectrum_envelope, a2.resample_spectrum_envelope(a2_r.fs)), 1.4e-7) assert (feature.calc_diff(a2_r.aperiodicity, a2.resample_aperiodicity(a2_r.fs)) == 0) assert_any.between(0.0009, feature.calc_diff( a2_r.mel_cepstrum.data, a2.resample_mel_cepstrum(a2_r.fs).data), 0.004, sig_dig=1) assert a2.mel_cepstrum.order == a2_r.mel_cepstrum.order f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(a1, a2_r) assert_any.between(0.0012, f0_diff, 0.014) assert_any.between(0.0015, spec_diff, 0.0068) assert_any.between(0.039, ape_diff, 0.20) assert_any.between(0.10, mcep_diff, 0.36) a2_r_wav = a2_r.synthesize() a2_r_s = kwiiyatta.Analyzer(a2_r_wav, frame_period=frame_period) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(a1, a2_r_s) assert_any.between(0.050, f0_diff, 0.11) assert_any.between(0.20, spec_diff, 0.23) assert_any.between(0.065, ape_diff, 0.32) assert_any.between(0.047, mcep_diff, 0.16) f2 = kwiiyatta.feature(a2) f2.extract_mel_cepstrum() f2.spectrum_envelope = None f2_mcep_r = f2.resample_mel_cepstrum(a1.fs) mcep_diff = feature.calc_diff(a1.mel_cepstrum.data, f2_mcep_r.data) assert_any.between(0.10, mcep_diff, 0.36) a2_mcep_r = kwiiyatta.resample(a2.mel_cepstrum, a1.fs) assert_any.between(0.0009, feature.calc_diff(a2_mcep_r.data, f2_mcep_r.data), 0.004, sig_dig=1) frame_fs2 = a1.spectrum_envelope.shape[1] * fs2 // a1.fs spec_diff = feature.calc_powered_diff( a1.spectrum_envelope[:, :frame_fs2], a2_r.spectrum_envelope[:, :frame_fs2]) assert_any.between(0.00012, spec_diff, 0.55)
def test_set_Analyzer_param(): analyzer = kwiiyatta.analyze_wav(dataset.CLB_WAV) assert analyzer._f0 is None assert analyzer._spectrum_envelope is None assert analyzer._mel_cepstrum.data is None assert analyzer._aperiodicity is None assert analyzer._is_voiced is None _ = analyzer.aperiodicity assert analyzer._f0 is not None assert analyzer._spectrum_envelope is None assert analyzer._mel_cepstrum.data is None assert analyzer._aperiodicity is not None assert analyzer._is_voiced is None analyzer._aperiodicity = None _ = analyzer.mel_cepstrum assert analyzer._f0 is not None assert analyzer._spectrum_envelope is not None assert analyzer._mel_cepstrum.data is not None assert analyzer._aperiodicity is None assert analyzer._is_voiced is None analyzer = kwiiyatta.analyze_wav(dataset.CLB_WAV) _ = analyzer.is_voiced assert analyzer._f0 is not None assert analyzer._spectrum_envelope is None assert analyzer._mel_cepstrum.data is None assert analyzer._aperiodicity is not None assert analyzer._is_voiced is not None analyzer = kwiiyatta.analyze_wav(dataset.CLB_WAV) _ = analyzer.mel_cepstrum feature = kwiiyatta.feature(analyzer) assert analyzer is not feature assert analyzer.mel_cepstrum_order == feature.mel_cepstrum_order assert analyzer._f0 is not None assert analyzer._f0 is feature.f0 assert analyzer._spectrum_envelope is not None assert (analyzer._spectrum_envelope is feature._spectrum_envelope) assert analyzer.mel_cepstrum_order == feature.mel_cepstrum_order assert analyzer._mel_cepstrum.data is not None assert (analyzer._mel_cepstrum.data is feature._mel_cepstrum.data) assert analyzer._aperiodicity is not None assert analyzer._aperiodicity is feature.aperiodicity assert analyzer._is_voiced is None assert (analyzer.is_voiced == feature.is_voiced).all() feature = feature[::2] f = copy.copy(feature) assert not feature.f0.flags['C_CONTIGUOUS'] assert not feature.spectrum_envelope.flags['C_CONTIGUOUS'] assert not feature.aperiodicity.flags['C_CONTIGUOUS'] feature.ascontiguousarray() assert feature.f0.flags['C_CONTIGUOUS'] assert feature.spectrum_envelope.flags['C_CONTIGUOUS'] assert feature.aperiodicity.flags['C_CONTIGUOUS'] assert feature is not f assert feature == f feature = kwiiyatta.feature(analyzer, mcep_order=analyzer.mel_cepstrum_order * 2) assert analyzer is not feature assert analyzer.mel_cepstrum_order != feature.mel_cepstrum_order assert analyzer._mel_cepstrum.data is not None assert (analyzer._mel_cepstrum.data is not feature.mel_cepstrum.data) assert ((analyzer._mel_cepstrum.data == feature.mel_cepstrum.data[:, :analyzer.mel_cepstrum_order + 1]).all()) feature.mel_cepstrum_order = analyzer.mel_cepstrum_order assert analyzer is not feature assert analyzer.mel_cepstrum_order == feature.mel_cepstrum_order assert analyzer._mel_cepstrum.data is not None assert (analyzer._mel_cepstrum.data is not feature.mel_cepstrum.data) assert (analyzer._mel_cepstrum.data == feature.mel_cepstrum.data).all() feature.f0 = None assert analyzer.f0 is not None feature.spectrum_envelope = None assert analyzer.spectrum_envelope is not None