def generate_file(path): out = Path(arguments.output_directory, path.stem + '.npy') if out.exists() and not arguments.enable_overwrite: return # load wave and padding wave_file_load_process = WaveFileLoadProcess( sample_rate=arguments.sample_rate, top_db=arguments.top_db, pad_second=arguments.pad_second, ) wave = wave_file_load_process(path, test=True) # make acoustic feature acoustic_feature_process = AcousticFeatureProcess( frame_period=arguments.frame_period, order=arguments.order, alpha=arguments.alpha, f0_estimating_method=arguments.f0_estimating_method, ) feature = acoustic_feature_process(wave, test=True).astype_only_float(numpy.float32) high_spectrogram = feature.spectrogram fftlen = pyworld.get_cheaptrick_fft_size(arguments.sample_rate) low_spectrogram = pysptk.mc2sp( feature.mfcc, alpha=arguments.alpha, fftlen=fftlen, ) # save numpy.save(out.absolute(), { 'low': low_spectrogram, 'high': high_spectrogram, })
def process(p: Path, super_resolution: SuperResolution): param = config.dataset.param wave_process = WaveFileLoadProcess( sample_rate=param.voice_param.sample_rate, top_db=None, ) acoustic_feature_process = AcousticFeatureProcess( frame_period=param.acoustic_feature_param.frame_period, order=param.acoustic_feature_param.order, alpha=param.acoustic_feature_param.alpha, f0_estimating_method=param.acoustic_feature_param.f0_estimating_method, ) try: if p.suffix in ['.npy', '.npz']: p = glob.glob(str(input_wave_directory / p.stem) + '.*')[0] p = Path(p) input = acoustic_feature_process(wave_process(str(p))) wave = super_resolution(input.spectrogram, acoustic_feature=input, sampling_rate=param.voice_param.sample_rate) librosa.output.write_wav(str(output / p.stem) + '.wav', wave.wave, wave.sampling_rate, norm=True) except: import traceback print('error!', str(p)) traceback.format_exc()
def __init__(self, config: Config, model_path: Path, gpu: int = None) -> None: self.config = config self.model_path = model_path self.gpu = gpu self.model = model = create_predictor(config.model) chainer.serializers.load_npz(str(model_path), model) if self.gpu is not None: model.to_gpu(self.gpu) self._param = param = config.dataset.param self._wave_process = WaveFileLoadProcess( sample_rate=param.voice_param.sample_rate, top_db=None, ) self._feature_process = AcousticFeatureProcess( frame_period=param.acoustic_feature_param.frame_period, order=param.acoustic_feature_param.order, alpha=param.acoustic_feature_param.alpha, f0_estimating_method=param.acoustic_feature_param. f0_estimating_method, ) self._acoustic_feature_load_process = acoustic_feature_load_process = AcousticFeatureLoadProcess( ) input_mean = acoustic_feature_load_process( config.dataset.input_mean_path, test=True) input_var = acoustic_feature_load_process( config.dataset.input_var_path, test=True) target_mean = acoustic_feature_load_process( config.dataset.target_mean_path, test=True) target_var = acoustic_feature_load_process( config.dataset.target_var_path, test=True) self._feature_normalize = AcousticFeatureNormalizeProcess( mean=input_mean, var=input_var, ) self._feature_denormalize = AcousticFeatureDenormalizeProcess( mean=target_mean, var=target_var, ) feature_sizes = AcousticFeature.get_sizes( sampling_rate=param.voice_param.sample_rate, order=param.acoustic_feature_param.order, ) self._encode_feature = EncodeFeatureProcess(config.dataset.features) self._decode_feature = DecodeFeatureProcess(config.dataset.features, feature_sizes)
def __init__( self, acoustic_feature_param: AcousticFeatureParam, out_sampling_rate: int, ): self.acoustic_feature_param = acoustic_feature_param self.out_sampling_rate = out_sampling_rate self._encoder = AcousticFeatureProcess( frame_period=acoustic_feature_param.frame_period, order=acoustic_feature_param.order, alpha=acoustic_feature_param.alpha, f0_estimating_method=acoustic_feature_param.f0_estimating_method, )
def generate_feature(path1, path2): out1 = Path(arguments.output1_directory, path1.stem + '.npy') out2 = Path(arguments.output2_directory, path2.stem + '.npy') if out1.exists() and out2.exists() and not arguments.enable_overwrite: return # load wave and padding wave_file_load_process = WaveFileLoadProcess( sample_rate=arguments.sample_rate, top_db=arguments.top_db, pad_second=arguments.pad_second, ) wave1 = wave_file_load_process(path1, test=True) wave2 = wave_file_load_process(path2, test=True) # make acoustic feature acoustic_feature_process1 = AcousticFeatureProcess( frame_period=arguments.frame_period, order=arguments.order, alpha=arguments.alpha, f0_estimating_method=arguments.f0_estimating_method, f0_floor=arguments.f0_floor1, f0_ceil=arguments.f0_ceil1, ) acoustic_feature_process2 = AcousticFeatureProcess( frame_period=arguments.frame_period, order=arguments.order, alpha=arguments.alpha, f0_estimating_method=arguments.f0_estimating_method, f0_floor=arguments.f0_floor2, f0_ceil=arguments.f0_ceil2, ) f1 = acoustic_feature_process1(wave1, test=True).astype_only_float(numpy.float32) f2 = acoustic_feature_process2(wave2, test=True).astype_only_float(numpy.float32) # pre convert if pre_convert: f1_ref = pre_converter1.convert_to_feature(f1) else: f1_ref = f1 # alignment if not arguments.disable_alignment: aligner = MFCCAligner(f1_ref.mfcc, f2.mfcc) f0_1, f0_2 = aligner.align(f1.f0, f2.f0) spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram) aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity) mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc) voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced) f1 = AcousticFeature( f0=f0_1, spectrogram=spectrogram_1, aperiodicity=aperiodicity_1, mfcc=mfcc_1, voiced=voiced_1, ) f2 = AcousticFeature( f0=f0_2, spectrogram=spectrogram_2, aperiodicity=aperiodicity_2, mfcc=mfcc_2, voiced=voiced_2, ) f1.validate() f2.validate() # save acoustic_feature_save_process = AcousticFeatureSaveProcess( validate=True, ignore=arguments.ignore_feature) acoustic_feature_save_process({'path': out1, 'feature': f1}) print('saved!', out1) acoustic_feature_save_process({'path': out2, 'feature': f2}) print('saved!', out2)