def test_voice_conversion_fullset(tmpdir): result_root = pathlib.Path(tmpdir) sys.argv = \ [ sys.argv[0], '--source', str(dataset.get_dataset_path(dataset.CLB_DIR, fullset=True)), '--target', str(dataset.get_dataset_path(dataset.SLT_DIR, fullset=True)), '--result-dir', str(result_root), '--converter-seed', '0', '--max-files', '100', '--skip-files', '3', str(dataset.get_wav_path(dataset.CLB_DIR/'arctic_a0001.wav', fullset=True)), str(dataset.get_wav_path(dataset.CLB_DIR/'arctic_a0002.wav', fullset=True)), str(dataset.get_wav_path(dataset.CLB_DIR/'arctic_a0003.wav', fullset=True)), ] np.random.seed(0) cv.main() results = ['arctic_a0001', 'arctic_a0002', 'arctic_a0003'] for result in results: result_path = (result_root / result).with_suffix('.diff.wav') assert result_path.is_file() expected = make_expected_feature(result + '.wav', fullset=True) actual = kwiiyatta.analyze_wav(result_path) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, actual) assert_any.between(0.068, f0_diff, 0.12) assert_any.between(0.49, spec_diff, 0.59) assert_any.between(0.050, ape_diff, 0.074) assert_any.between(0.10, mcep_diff, 0.11) for result in results: result_path = (result_root / result).with_suffix('.synth.wav') assert result_path.is_file() expected = make_expected_feature(result + '.wav', fullset=True) actual = kwiiyatta.analyze_wav(result_path) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, actual) assert_any.between(0.062, f0_diff, 0.075) assert_any.between(0.48, spec_diff, 0.56) assert_any.between(0.10, ape_diff, 0.13) assert_any.between(0.10, mcep_diff, 0.11)
def test_voice_resynthesis_diffvc(check, tmpdir): result_root = pathlib.Path(tmpdir) sys.argv = [ sys.argv[0], str(dataset.CLB_WAV), "--result-dir", str(result_root), "--mcep", "--mcep-order", "48", "--carrier", str(dataset.SLT_WAV), "--diffvc" ] rv.main() result_file = result_root / 'arctic_a0001.wav' assert result_file.is_file() clb = feature.get_analyzer(dataset.CLB_WAV) slt = feature.get_analyzer(dataset.SLT_WAV) expected = kwiiyatta.align(clb, slt) expected.f0 = slt.f0 feature.override_spectrum_power(expected, slt) expected.aperiodicity = slt.aperiodicity expected.mel_cepstrum = None actual = kwiiyatta.analyze_wav(result_file) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, actual) check.round_equal(0.10, f0_diff) check.round_equal(0.36, spec_diff) check.round_equal(0.076, ape_diff) check.round_equal(0.081, mcep_diff)
def test_voice_resynthesis_carrier(check, tmpdir): result_root = pathlib.Path(tmpdir) sys.argv = [ sys.argv[0], str(dataset.CLB_WAV), "--result-dir", str(result_root), "--mcep", "--mcep-order", "48", "--carrier", str(dataset.SLT_WAV) ] rv.main() result_file = result_root / 'arctic_a0001.wav' assert result_file.is_file() clb = feature.get_analyzer(dataset.CLB_WAV) slt = feature.get_analyzer(dataset.SLT_WAV) expected = kwiiyatta.align(clb, slt) expected.f0 = slt.f0 actual = kwiiyatta.analyze_wav(result_file) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, actual) check.round_equal(0.093, f0_diff) check.round_equal(0.23, spec_diff) check.round_equal(0.093, ape_diff) check.round_equal(0.060, mcep_diff)
def test_mcep_to_spec(wavfile, mcep_order): a1 = kwiiyatta.analyze_wav(wavfile, mcep_order=mcep_order) mcep = a1.mel_cepstrum assert mcep.data.shape[-1] == mcep_order + 1 spec_diff = feature.calc_powered_diff( a1.spectrum_envelope, mcep.extract_spectrum(a1.spectrum_len)) assert_any.between(0.021, spec_diff, 0.091)
def test_voice_conversion(tmpdir, setup_func, target_fs, test_fs): tmp_path = pathlib.Path(tmpdir) result_root = tmp_path / 'result' src_dir = tmp_path / 'src' src_dir.mkdir(exist_ok=True) setup_func(src_dir) sys.argv = \ [ sys.argv[0], '--source', str(src_dir), '--target', str(dataset.get_dataset_path(dataset.SLT_DIR, fs=target_fs)), '--result-dir', str(result_root), '--converter-seed', '0', '--converter-components', '1', '--max-files', '8', str(dataset.get_wav_path(dataset.CLB_DIR/'arctic_a0009.wav', fs=test_fs)), ] np.random.seed(0) cv.main() assert (result_root / 'arctic_a0009.diff.wav').is_file() assert (result_root / 'arctic_a0009.synth.wav').is_file() expected = make_expected_feature('arctic_a0009.wav', fs=test_fs) act_diff = kwiiyatta.analyze_wav(result_root / 'arctic_a0009.diff.wav') f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, act_diff) assert_any.between(0.057, f0_diff, 0.092) assert_any.between(0.46, spec_diff, 0.54) assert_any.between(0.042, ape_diff, 0.049) assert_any.between(0.077, mcep_diff, 0.11) act_synth = kwiiyatta.analyze_wav(result_root / 'arctic_a0009.synth.wav') f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, act_synth) assert_any.between(0.10, f0_diff, 0.12) assert_any.between(0.47, spec_diff, 0.52) assert_any.between(0.073, ape_diff, 0.095) assert_any.between(0.078, mcep_diff, 0.11)
def test_dataset(): clb_dataset = kwiiyatta.WavFileDataset(dataset.CLB_DIR) expected_keys = { pathlib.Path(f'arctic_a{num:04}.wav') for num in range(1, 10) } assert expected_keys == clb_dataset.keys() for key in expected_keys: expected = kwiiyatta.analyze_wav(dataset.CLB_DIR / key) actual = clb_dataset[key] assert (expected.f0 == actual.f0).all() assert (expected.spectrum_envelope == actual.spectrum_envelope).all() assert (expected.aperiodicity == actual.aperiodicity).all() assert (expected.mel_cepstrum.data == actual.mel_cepstrum.data).all() mcep_dataset = MelCepstrumDataset(clb_dataset) expected = kwiiyatta.analyze_wav(dataset.CLB_WAV).mel_cepstrum.data[:, 1:] assert (expected == mcep_dataset['arctic_a0001.wav']).all()
def test_voice_resynthesis(tmpdir, check): result_root = pathlib.Path(tmpdir) sys.argv = [ sys.argv[0], "--result-dir", str(result_root), str(dataset.CLB_WAV), ] rv.main() result_file = result_root / 'arctic_a0001.wav' assert result_file.is_file() expected = kwiiyatta.analyze_wav(dataset.CLB_WAV) actual = kwiiyatta.analyze_wav(result_file) f0_diff, spec_diff, ape_diff, mcep_diff = \ feature.calc_feature_diffs(expected, actual) check.round_equal(0.079, f0_diff) check.round_equal(0.20, spec_diff) check.round_equal(0.073, ape_diff) check.round_equal(0.054, mcep_diff)
def test_reshape(check): from kwiiyatta.vocoder.world import WorldAnalyzer a1 = kwiiyatta.analyze_wav(dataset.CLB_WAV) a2 = WorldAnalyzer.load_wav(dataset.CLB_WAV) fft_size = (a1.spectrum_len - 1) * 2 * 2 a2.extract_spectrum_envelope(fft_size=fft_size) a2.extract_aperiodicity(fft_size=fft_size) assert a1.spectrum_len != a2.spectrum_len f2_r = kwiiyatta.reshape(a2, a1.spectrum_len) assert f2_r.spectrum_len == a1.spectrum_len f2 = kwiiyatta.feature(a2) assert f2.spectrum_len != f2_r.spectrum_len assert (feature.calc_powered_diff( f2_r.spectrum_envelope, f2.reshaped_spectrum_envelope(a1.spectrum_len)) == 0) assert (feature.calc_diff(f2_r.aperiodicity, f2.reshaped_aperiodicity(a1.spectrum_len)) == 0) _, spec_diff, ape_diff, mcep_diff = feature.calc_feature_diffs(a1, f2_r) check.round_equal(0.0025, spec_diff) check.round_equal(0.00087, ape_diff) check.round_equal(0.0012, mcep_diff) a1._spectrum_envelope = None a1._aperiodicity = None spec_diff = feature.calc_powered_diff( a2.spectrum_envelope, a1.reshaped_spectrum_envelope(f2.spectrum_len)) check.round_equal(0.0038, spec_diff) ape_diff = feature.calc_diff(a2.aperiodicity, a1.reshaped_aperiodicity(f2.spectrum_len)) check.round_equal(0.00088, ape_diff) f1 = kwiiyatta.feature(a1) f1.extract_mel_cepstrum() f1.spectrum_envelope = None spec_diff = feature.calc_powered_diff( a2.spectrum_envelope, f1.reshaped_spectrum_envelope(a2.spectrum_len)) check.round_equal(0.090, spec_diff)
def test_analyzer_feature(fs, frame_period): from kwiiyatta.vocoder.world import WorldSynthesizer a = kwiiyatta.analyze_wav(dataset.get_wav_path(dataset.CLB_WAV, fs=fs), frame_period=frame_period, mcep_order=36) assert a.mel_cepstrum_order == 36 assert a.spectrum_len == WorldSynthesizer.fs_spectrum_len(a.fs) frame_len = a.frame_len f = kwiiyatta.feature(a) assert a is not f assert a.fs == f.fs assert a.frame_period == frame_period assert a.frame_period == f.frame_period assert a.frame_len == f.frame_len == frame_len assert a.spectrum_len == f.spectrum_len assert a.mel_cepstrum_order == f.mel_cepstrum_order assert feature.calc_diff(a.f0, f.f0) == 0 assert feature.calc_powered_diff(a.spectrum_envelope, f.spectrum_envelope) == 0 assert feature.calc_diff(a.aperiodicity, f.aperiodicity) == 0 assert f._mel_cepstrum.data is None assert feature.calc_diff(a.mel_cepstrum.data, f.mel_cepstrum.data) == 0
def _cached_analyzer(wavfile, frame_period): return kwiiyatta.analyze_wav(wavfile, frame_period=frame_period)
def test_set_Analyzer_param(): analyzer = kwiiyatta.analyze_wav(dataset.CLB_WAV) assert analyzer._f0 is None assert analyzer._spectrum_envelope is None assert analyzer._mel_cepstrum.data is None assert analyzer._aperiodicity is None assert analyzer._is_voiced is None _ = analyzer.aperiodicity assert analyzer._f0 is not None assert analyzer._spectrum_envelope is None assert analyzer._mel_cepstrum.data is None assert analyzer._aperiodicity is not None assert analyzer._is_voiced is None analyzer._aperiodicity = None _ = analyzer.mel_cepstrum assert analyzer._f0 is not None assert analyzer._spectrum_envelope is not None assert analyzer._mel_cepstrum.data is not None assert analyzer._aperiodicity is None assert analyzer._is_voiced is None analyzer = kwiiyatta.analyze_wav(dataset.CLB_WAV) _ = analyzer.is_voiced assert analyzer._f0 is not None assert analyzer._spectrum_envelope is None assert analyzer._mel_cepstrum.data is None assert analyzer._aperiodicity is not None assert analyzer._is_voiced is not None analyzer = kwiiyatta.analyze_wav(dataset.CLB_WAV) _ = analyzer.mel_cepstrum feature = kwiiyatta.feature(analyzer) assert analyzer is not feature assert analyzer.mel_cepstrum_order == feature.mel_cepstrum_order assert analyzer._f0 is not None assert analyzer._f0 is feature.f0 assert analyzer._spectrum_envelope is not None assert (analyzer._spectrum_envelope is feature._spectrum_envelope) assert analyzer.mel_cepstrum_order == feature.mel_cepstrum_order assert analyzer._mel_cepstrum.data is not None assert (analyzer._mel_cepstrum.data is feature._mel_cepstrum.data) assert analyzer._aperiodicity is not None assert analyzer._aperiodicity is feature.aperiodicity assert analyzer._is_voiced is None assert (analyzer.is_voiced == feature.is_voiced).all() feature = feature[::2] f = copy.copy(feature) assert not feature.f0.flags['C_CONTIGUOUS'] assert not feature.spectrum_envelope.flags['C_CONTIGUOUS'] assert not feature.aperiodicity.flags['C_CONTIGUOUS'] feature.ascontiguousarray() assert feature.f0.flags['C_CONTIGUOUS'] assert feature.spectrum_envelope.flags['C_CONTIGUOUS'] assert feature.aperiodicity.flags['C_CONTIGUOUS'] assert feature is not f assert feature == f feature = kwiiyatta.feature(analyzer, mcep_order=analyzer.mel_cepstrum_order * 2) assert analyzer is not feature assert analyzer.mel_cepstrum_order != feature.mel_cepstrum_order assert analyzer._mel_cepstrum.data is not None assert (analyzer._mel_cepstrum.data is not feature.mel_cepstrum.data) assert ((analyzer._mel_cepstrum.data == feature.mel_cepstrum.data[:, :analyzer.mel_cepstrum_order + 1]).all()) feature.mel_cepstrum_order = analyzer.mel_cepstrum_order assert analyzer is not feature assert analyzer.mel_cepstrum_order == feature.mel_cepstrum_order assert analyzer._mel_cepstrum.data is not None assert (analyzer._mel_cepstrum.data is not feature.mel_cepstrum.data) assert (analyzer._mel_cepstrum.data == feature.mel_cepstrum.data).all() feature.f0 = None assert analyzer.f0 is not None feature.spectrum_envelope = None assert analyzer.spectrum_envelope is not None
def collect_features(self, path): feature = kwiiyatta.analyze_wav(path) s = trim_zeros_frames(feature.spectrum_envelope) return feature.mel_cepstrum.data[:len(s)] # トリムするフレームが手前にずれてるのでは?