def test_mlsa_filter(fs1, fs2): np.random.seed(0) clb = feature.get_analyzer(dataset.get_wav_path(dataset.CLB_WAV, fs=fs1)) slt = feature.get_analyzer(dataset.get_wav_path(dataset.SLT_WAV, fs=fs2)) slt_aligned = kwiiyatta.align(slt, clb) mcep_diff = copy.copy(slt_aligned.mel_cepstrum) mcep_diff.data = mcep_diff.data - clb.mel_cepstrum.resample_data(slt.fs) result = kwiiyatta.apply_mlsa_filter(clb.wavdata, mcep_diff) expected = kwiiyatta.feature(clb) if clb.fs > slt.fs: slt_shape = clb.spectrum_len * slt.fs // clb.fs expected.spectrum_envelope = \ np.hstack(( feature.override_power( slt_aligned.reshaped_spectrum_envelope(slt_shape), clb.spectrum_envelope[:, :slt_shape] ), clb.spectrum_envelope[:, slt_shape:] )) else: expected.spectrum_envelope = \ slt_aligned.resample_spectrum_envelope(clb.fs) feature.override_spectrum_power(expected, clb) actual = kwiiyatta.Analyzer(result) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, actual) assert_any.between(0.051, f0_diff, 0.078) assert_any.between(0.32, spec_diff, 0.55) assert_any.between(0.039, ape_diff, 0.073) assert_any.between(0.038, mcep_diff, 0.088)
def make_expected_feature(wavpath, fs=16000, fullset=False): src = feature.get_analyzer( dataset.get_wav_path(dataset.CLB_DIR / wavpath, fullset, fs=fs)) tgt = feature.get_analyzer( dataset.get_wav_path(dataset.SLT_DIR / wavpath, fullset, fs=fs)) tgt_aligned = kwiiyatta.align(tgt, src) expected = kwiiyatta.feature(src) expected.spectrum_envelope = tgt_aligned.spectrum_envelope feature.override_spectrum_power(expected, src) return expected
def test_voice_conversion_fullset(tmpdir): result_root = pathlib.Path(tmpdir) sys.argv = \ [ sys.argv[0], '--source', str(dataset.get_dataset_path(dataset.CLB_DIR, fullset=True)), '--target', str(dataset.get_dataset_path(dataset.SLT_DIR, fullset=True)), '--result-dir', str(result_root), '--converter-seed', '0', '--max-files', '100', '--skip-files', '3', str(dataset.get_wav_path(dataset.CLB_DIR/'arctic_a0001.wav', fullset=True)), str(dataset.get_wav_path(dataset.CLB_DIR/'arctic_a0002.wav', fullset=True)), str(dataset.get_wav_path(dataset.CLB_DIR/'arctic_a0003.wav', fullset=True)), ] np.random.seed(0) cv.main() results = ['arctic_a0001', 'arctic_a0002', 'arctic_a0003'] for result in results: result_path = (result_root / result).with_suffix('.diff.wav') assert result_path.is_file() expected = make_expected_feature(result + '.wav', fullset=True) actual = kwiiyatta.analyze_wav(result_path) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, actual) assert_any.between(0.068, f0_diff, 0.12) assert_any.between(0.49, spec_diff, 0.59) assert_any.between(0.050, ape_diff, 0.074) assert_any.between(0.10, mcep_diff, 0.11) for result in results: result_path = (result_root / result).with_suffix('.synth.wav') assert result_path.is_file() expected = make_expected_feature(result + '.wav', fullset=True) actual = kwiiyatta.analyze_wav(result_path) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, actual) assert_any.between(0.062, f0_diff, 0.075) assert_any.between(0.48, spec_diff, 0.56) assert_any.between(0.10, ape_diff, 0.13) assert_any.between(0.10, mcep_diff, 0.11)
def test_resample_down(fs1, fs2, wavfile, frame_period): if fs2 < fs1: fs1, fs2 = fs2, fs1 a1 = feature.get_analyzer(dataset.get_wav_path(wavfile, fs=fs1), frame_period=frame_period) a2 = feature.get_analyzer(dataset.get_wav_path(wavfile, fs=fs2), frame_period=frame_period) a2_r = kwiiyatta.resample(a2, fs1) a2._spectrum_envelope = None a2._aperiodicity = None a2._mel_cepstrum.data = None assert a1.fs == a2_r.fs assert (feature.calc_diff(a2_r.mel_cepstrum.data, a2.resample_mel_cepstrum(a2_r.fs).data) == 0) assert feature.calc_diff(a2_r.f0, a2.f0) == 0 assert (feature.calc_powered_diff(a2_r.spectrum_envelope, a2.resample_spectrum_envelope( a2_r.fs)) == 0) assert (feature.calc_diff(a2_r.aperiodicity, a2.resample_aperiodicity(a2_r.fs)) == 0) assert a2.mel_cepstrum.order == a2_r.mel_cepstrum.order f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(a1, a2_r) assert_any.between(0.0012, f0_diff, 0.014) assert_any.between(0.0025, spec_diff, 0.0094) assert_any.between(0.0015, ape_diff, 0.048) assert_any.between(0.011, mcep_diff, 0.031) a2_r_wav = a2_r.synthesize() a2_r_s = kwiiyatta.Analyzer(a2_r_wav, frame_period=frame_period) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(a1, a2_r_s) assert_any.between(0.055, f0_diff, 0.11) assert_any.between(0.20, spec_diff, 0.23) assert_any.between(0.072, ape_diff, 0.10) assert_any.between(0.038, mcep_diff, 0.056) f2 = kwiiyatta.feature(a2) f2.extract_mel_cepstrum() f2.spectrum_envelope = None f2_mcep_r = f2.resample_mel_cepstrum(a1.fs) mcep_diff = feature.calc_diff(a1.mel_cepstrum.data, f2_mcep_r.data) assert_any.between(0.014, mcep_diff, 0.041) a2_mcep_r = kwiiyatta.resample(a2.mel_cepstrum, a1.fs) mcep_diff = feature.calc_diff(a2_mcep_r.data, f2_mcep_r.data) assert mcep_diff == 0
def setup_fs_dataset(src_dir): for num, fs in zip(range(1, 9), itertools.chain(dataset.FS, itertools.repeat(16000))): shutil.copy( dataset.get_wav_path( dataset.CLB_DIR / f'arctic_a{num:04}.wav', fs=fs, ), src_dir)
def setup_dtype_dataset(src_dir): for num, dtype in zip( range(1, 9), itertools.chain(dataset.DTYPES, itertools.repeat('i16'))): shutil.copy( dataset.get_wav_path( dataset.CLB_DIR / f'arctic_a{num:04}.wav', dtype=dtype, ), src_dir)
def test_voice_conversion(tmpdir, setup_func, target_fs, test_fs): tmp_path = pathlib.Path(tmpdir) result_root = tmp_path / 'result' src_dir = tmp_path / 'src' src_dir.mkdir(exist_ok=True) setup_func(src_dir) sys.argv = \ [ sys.argv[0], '--source', str(src_dir), '--target', str(dataset.get_dataset_path(dataset.SLT_DIR, fs=target_fs)), '--result-dir', str(result_root), '--converter-seed', '0', '--converter-components', '1', '--max-files', '8', str(dataset.get_wav_path(dataset.CLB_DIR/'arctic_a0009.wav', fs=test_fs)), ] np.random.seed(0) cv.main() assert (result_root / 'arctic_a0009.diff.wav').is_file() assert (result_root / 'arctic_a0009.synth.wav').is_file() expected = make_expected_feature('arctic_a0009.wav', fs=test_fs) act_diff = kwiiyatta.analyze_wav(result_root / 'arctic_a0009.diff.wav') f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, act_diff) assert_any.between(0.057, f0_diff, 0.092) assert_any.between(0.46, spec_diff, 0.54) assert_any.between(0.042, ape_diff, 0.049) assert_any.between(0.077, mcep_diff, 0.11) act_synth = kwiiyatta.analyze_wav(result_root / 'arctic_a0009.synth.wav') f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, act_synth) assert_any.between(0.10, f0_diff, 0.12) assert_any.between(0.47, spec_diff, 0.52) assert_any.between(0.073, ape_diff, 0.095) assert_any.between(0.078, mcep_diff, 0.11)
def test_reanalyze(wavfile, dtype, fs, frame_period): a1 = feature.get_analyzer(dataset.get_wav_path(wavfile, dtype=dtype, fs=fs), frame_period=frame_period) assert a1.fs == fs analyzer_wav = a1.synthesize() feature_wav = kwiiyatta.feature(a1).synthesize() assert analyzer_wav.fs == feature_wav.fs assert (analyzer_wav.data == feature_wav.data).all() a2 = kwiiyatta.Analyzer(analyzer_wav, frame_period=frame_period) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(a1, a2) assert_any.between(0.052, f0_diff, 0.094) assert_any.between(0.20, spec_diff, 0.22) assert_any.between(0.063, ape_diff, 0.096) assert_any.between(0.030, mcep_diff, 0.055)
def test_analyzer_feature(fs, frame_period): from kwiiyatta.vocoder.world import WorldSynthesizer a = kwiiyatta.analyze_wav(dataset.get_wav_path(dataset.CLB_WAV, fs=fs), frame_period=frame_period, mcep_order=36) assert a.mel_cepstrum_order == 36 assert a.spectrum_len == WorldSynthesizer.fs_spectrum_len(a.fs) frame_len = a.frame_len f = kwiiyatta.feature(a) assert a is not f assert a.fs == f.fs assert a.frame_period == frame_period assert a.frame_period == f.frame_period assert a.frame_len == f.frame_len == frame_len assert a.spectrum_len == f.spectrum_len assert a.mel_cepstrum_order == f.mel_cepstrum_order assert feature.calc_diff(a.f0, f.f0) == 0 assert feature.calc_powered_diff(a.spectrum_envelope, f.spectrum_envelope) == 0 assert feature.calc_diff(a.aperiodicity, f.aperiodicity) == 0 assert f._mel_cepstrum.data is None assert feature.calc_diff(a.mel_cepstrum.data, f.mel_cepstrum.data) == 0
def setup_44_dataset(src_dir): for num in range(1, 9): shutil.copy( dataset.get_wav_path(dataset.CLB_DIR / f'arctic_a{num:04}.wav', fs=44100), src_dir)
def test_resample_up(fs1, fs2, wavfile, frame_period): np.random.seed(0) if fs1 < fs2: fs1, fs2 = fs2, fs1 a1 = feature.get_analyzer(dataset.get_wav_path(wavfile, fs=fs1), frame_period=frame_period) a2 = feature.get_analyzer(dataset.get_wav_path(wavfile, fs=fs2), frame_period=frame_period) a2_r = kwiiyatta.resample(a2, fs1) a2._spectrum_envelope = None a2._aperiodicity = None a2._mel_cepstrum.data = None assert a1.fs == a2_r.fs assert feature.calc_diff(a2_r.f0, a2.f0) == 0 assert_any.between( 1.7e-8, feature.calc_powered_diff(a2_r.spectrum_envelope, a2.resample_spectrum_envelope(a2_r.fs)), 1.4e-7) assert (feature.calc_diff(a2_r.aperiodicity, a2.resample_aperiodicity(a2_r.fs)) == 0) assert_any.between(0.0009, feature.calc_diff( a2_r.mel_cepstrum.data, a2.resample_mel_cepstrum(a2_r.fs).data), 0.004, sig_dig=1) assert a2.mel_cepstrum.order == a2_r.mel_cepstrum.order f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(a1, a2_r) assert_any.between(0.0012, f0_diff, 0.014) assert_any.between(0.0015, spec_diff, 0.0068) assert_any.between(0.039, ape_diff, 0.20) assert_any.between(0.10, mcep_diff, 0.36) a2_r_wav = a2_r.synthesize() a2_r_s = kwiiyatta.Analyzer(a2_r_wav, frame_period=frame_period) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(a1, a2_r_s) assert_any.between(0.050, f0_diff, 0.11) assert_any.between(0.20, spec_diff, 0.23) assert_any.between(0.065, ape_diff, 0.32) assert_any.between(0.047, mcep_diff, 0.16) f2 = kwiiyatta.feature(a2) f2.extract_mel_cepstrum() f2.spectrum_envelope = None f2_mcep_r = f2.resample_mel_cepstrum(a1.fs) mcep_diff = feature.calc_diff(a1.mel_cepstrum.data, f2_mcep_r.data) assert_any.between(0.10, mcep_diff, 0.36) a2_mcep_r = kwiiyatta.resample(a2.mel_cepstrum, a1.fs) assert_any.between(0.0009, feature.calc_diff(a2_mcep_r.data, f2_mcep_r.data), 0.004, sig_dig=1) frame_fs2 = a1.spectrum_envelope.shape[1] * fs2 // a1.fs spec_diff = feature.calc_powered_diff( a1.spectrum_envelope[:, :frame_fs2], a2_r.spectrum_envelope[:, :frame_fs2]) assert_any.between(0.00012, spec_diff, 0.55)