def process(p_in: Path, acoustic_converter: AcousticConverter): try: if p_in.suffix in ['.npy', '.npz']: p_in = Path(glob.glob(str(dataset_wave_dir / p_in.stem) + '.*')[0]) # input wave w_in = acoustic_converter.load_wave(p_in) w_in.wave *= input_scale # input feature f_in = acoustic_converter.extract_acoustic_feature(w_in) f_in_effective, effective = acoustic_converter.separate_effective( wave=w_in, feature=f_in, threshold=threshold) # convert f_out = acoustic_converter.convert_loop(f_in_effective) f_out = acoustic_converter.combine_silent(effective=effective, feature=f_out) f_out = acoustic_converter.decode_spectrogram(f_out) # save sampling_rate = acoustic_converter.out_sampling_rate frame_period = acoustic_converter.config.dataset.acoustic_param.frame_period wave = f_out.decode(sampling_rate=sampling_rate, frame_period=frame_period) librosa.output.write_wav(y=wave.wave, path=str(output_dir / (p_in.stem + '.wav')), sr=wave.sampling_rate) except: import traceback traceback.print_exc()
def _convert(p_in: Path, acoustic_converter: AcousticConverter, super_resolution: SuperResolution): w_in = acoustic_converter.load_wave(p_in) f_in = acoustic_converter.extract_acoustic_feature(w_in) f_in_effective, effective = acoustic_converter.separate_effective( wave=w_in, feature=f_in) f_low = acoustic_converter.convert(f_in_effective) f_low = acoustic_converter.combine_silent(effective=effective, feature=f_low) f_low = acoustic_converter.decode_spectrogram(f_low) s_high = super_resolution.convert(f_low.sp.astype(numpy.float32)) f_low_sr = BYAcousticFeature( f0=f_low.f0, spectrogram=f_low.sp, aperiodicity=f_low.ap, mfcc=f_low.mc, voiced=f_low.voiced, ) rate = acoustic_converter.out_sampling_rate wave = super_resolution(s_high, acoustic_feature=f_low_sr, sampling_rate=rate) return wave
def process(p_in: Path, acoustic_converter: AcousticConverter, super_resolution: SuperResolution): try: if p_in.suffix in ['.npy', '.npz']: p_in = Path( glob.glob(str(dataset_input_wave_dir / p_in.stem) + '.*')[0]) w_in = acoustic_converter.load_wave(p_in) f_in = acoustic_converter.extract_acoustic_feature(w_in) f_low = acoustic_converter.convert(f_in) # f_low = AcousticFeature( # aperiodicity=f_low.aperiodicity, # mfcc=f_low.mfcc, # voiced=f_low.voiced, # spectrogram=f_low.spectrogram, # f0=scipy.ndimage.uniform_filter(f_low.f0, size=(5, 1)).astype(numpy.float32), # ) s_high = super_resolution.convert(f_low.sp.astype(numpy.float32)) # target p_true = Path( glob.glob(str(dataset_target_wave_dir / p_in.stem) + '.*')[0]) w_true = acoustic_converter.load_wave(p_true) f_true = acoustic_converter.extract_acoustic_feature(w_true) # save figure fig = plt.figure(figsize=[18, 8]) plt.subplot(4, 1, 1) plt.imshow(numpy.log(f_in.sp).T, aspect='auto', origin='reverse') plt.plot(f_in.f0, 'w') plt.colorbar() plt.subplot(4, 1, 2) plt.imshow(numpy.log(f_low.sp).T, aspect='auto', origin='reverse') plt.plot(f_low.f0, 'w') plt.colorbar() plt.subplot(4, 1, 3) plt.imshow(numpy.log(s_high).T, aspect='auto', origin='reverse') plt.colorbar() plt.subplot(4, 1, 4) plt.imshow(numpy.log(f_true.sp).T, aspect='auto', origin='reverse') plt.plot(f_true.f0, 'w') plt.colorbar() fig.savefig(output / (p_in.stem + '.png')) # save wave f_low_sr = BYAcousticFeature( f0=f_low.f0, spectrogram=f_low.sp, aperiodicity=f_low.ap, mfcc=f_low.mc, voiced=f_low.voiced, ) rate = acoustic_converter.out_sampling_rate wave = super_resolution(s_high, acoustic_feature=f_low_sr, sampling_rate=rate) librosa.output.write_wav(y=wave.wave, path=str(output / (p_in.stem + '.wav')), sr=rate) except: import traceback print('error!', str(p_in)) traceback.format_exc()
def process(p_in: Path, acoustic_converter: AcousticConverter, super_resolution: SuperResolution): try: if p_in.suffix in ['.npy', '.npz']: p_in = Path( glob.glob(str(dataset_input_wave_dir / p_in.stem) + '.*')[0]) w_in = acoustic_converter.load_wave(p_in) f_in = acoustic_converter.extract_acoustic_feature(w_in) f_in_effective, effective = acoustic_converter.separate_effective( wave=w_in, feature=f_in) f_low = acoustic_converter.convert(f_in_effective) f_low = acoustic_converter.combine_silent(effective=effective, feature=f_low) if filter_size is not None: f_low.f0 = AcousticConverter.filter_f0(f_low.f0, filter_size=filter_size) f_low = acoustic_converter.decode_spectrogram(f_low) s_high = super_resolution.convert(f_low.sp.astype(numpy.float32)) # target paths = glob.glob(str(dataset_target_wave_dir / p_in.stem) + '.*') has_true = len(paths) > 0 if has_true: p_true = Path(paths[0]) w_true = acoustic_converter.load_wave(p_true) f_true = acoustic_converter.extract_acoustic_feature(w_true) # save figure fig = plt.figure(figsize=[36, 22]) plt.subplot(4, 1, 1) plt.imshow(numpy.log(f_in.sp).T, aspect='auto', origin='reverse') plt.plot(f_in.f0, 'w') plt.colorbar() plt.subplot(4, 1, 2) plt.imshow(numpy.log(f_low.sp).T, aspect='auto', origin='reverse') plt.plot(f_low.f0, 'w') plt.colorbar() plt.subplot(4, 1, 3) plt.imshow(numpy.log(s_high).T, aspect='auto', origin='reverse') plt.colorbar() if has_true: plt.subplot(4, 1, 4) plt.imshow(numpy.log(f_true.sp).T, aspect='auto', origin='reverse') plt.plot(f_true.f0, 'w') plt.colorbar() fig.savefig(output / (p_in.stem + '.png')) # save wave f_low_sr = BYAcousticFeature( f0=f_low.f0, spectrogram=f_low.sp, aperiodicity=f_low.ap, mfcc=f_low.mc, voiced=f_low.voiced, ) rate = acoustic_converter.out_sampling_rate wave = super_resolution(s_high, acoustic_feature=f_low_sr, sampling_rate=rate) librosa.output.write_wav(y=wave.wave, path=str(output / (p_in.stem + '.wav')), sr=rate) except: pass