def generate_file(path):
    out = Path(arguments.output_directory, path.stem + '.npy')
    if out.exists() and not arguments.enable_overwrite:
        return

    # load wave and padding
    wave_file_load_process = WaveFileLoadProcess(
        sample_rate=arguments.sample_rate,
        top_db=arguments.top_db,
        pad_second=arguments.pad_second,
    )
    wave = wave_file_load_process(path, test=True)

    # make acoustic feature
    acoustic_feature_process = AcousticFeatureProcess(
        frame_period=arguments.frame_period,
        order=arguments.order,
        alpha=arguments.alpha,
        f0_estimating_method=arguments.f0_estimating_method,
    )
    feature = acoustic_feature_process(wave, test=True).astype_only_float(numpy.float32)
    high_spectrogram = feature.spectrogram

    fftlen = pyworld.get_cheaptrick_fft_size(arguments.sample_rate)
    low_spectrogram = pysptk.mc2sp(
        feature.mfcc,
        alpha=arguments.alpha,
        fftlen=fftlen,
    )

    # save
    numpy.save(out.absolute(), {
        'low': low_spectrogram,
        'high': high_spectrogram,
    })
Example #2
0
def process(p: Path, super_resolution: SuperResolution):
    param = config.dataset.param
    wave_process = WaveFileLoadProcess(
        sample_rate=param.voice_param.sample_rate,
        top_db=None,
    )
    acoustic_feature_process = AcousticFeatureProcess(
        frame_period=param.acoustic_feature_param.frame_period,
        order=param.acoustic_feature_param.order,
        alpha=param.acoustic_feature_param.alpha,
        f0_estimating_method=param.acoustic_feature_param.f0_estimating_method,
    )

    try:
        if p.suffix in ['.npy', '.npz']:
            p = glob.glob(str(input_wave_directory / p.stem) + '.*')[0]
            p = Path(p)
        input = acoustic_feature_process(wave_process(str(p)))
        wave = super_resolution(input.spectrogram,
                                acoustic_feature=input,
                                sampling_rate=param.voice_param.sample_rate)
        librosa.output.write_wav(str(output / p.stem) + '.wav',
                                 wave.wave,
                                 wave.sampling_rate,
                                 norm=True)
    except:
        import traceback
        print('error!', str(p))
        traceback.format_exc()
    def __init__(self,
                 config: Config,
                 model_path: Path,
                 gpu: int = None) -> None:
        self.config = config
        self.model_path = model_path
        self.gpu = gpu

        self.model = model = create_predictor(config.model)
        chainer.serializers.load_npz(str(model_path), model)
        if self.gpu is not None:
            model.to_gpu(self.gpu)

        self._param = param = config.dataset.param
        self._wave_process = WaveFileLoadProcess(
            sample_rate=param.voice_param.sample_rate,
            top_db=None,
        )
        self._feature_process = AcousticFeatureProcess(
            frame_period=param.acoustic_feature_param.frame_period,
            order=param.acoustic_feature_param.order,
            alpha=param.acoustic_feature_param.alpha,
            f0_estimating_method=param.acoustic_feature_param.
            f0_estimating_method,
        )

        self._acoustic_feature_load_process = acoustic_feature_load_process = AcousticFeatureLoadProcess(
        )

        input_mean = acoustic_feature_load_process(
            config.dataset.input_mean_path, test=True)
        input_var = acoustic_feature_load_process(
            config.dataset.input_var_path, test=True)
        target_mean = acoustic_feature_load_process(
            config.dataset.target_mean_path, test=True)
        target_var = acoustic_feature_load_process(
            config.dataset.target_var_path, test=True)
        self._feature_normalize = AcousticFeatureNormalizeProcess(
            mean=input_mean,
            var=input_var,
        )
        self._feature_denormalize = AcousticFeatureDenormalizeProcess(
            mean=target_mean,
            var=target_var,
        )

        feature_sizes = AcousticFeature.get_sizes(
            sampling_rate=param.voice_param.sample_rate,
            order=param.acoustic_feature_param.order,
        )
        self._encode_feature = EncodeFeatureProcess(config.dataset.features)
        self._decode_feature = DecodeFeatureProcess(config.dataset.features,
                                                    feature_sizes)
Example #4
0
 def __init__(
     self,
     acoustic_feature_param: AcousticFeatureParam,
     out_sampling_rate: int,
 ):
     self.acoustic_feature_param = acoustic_feature_param
     self.out_sampling_rate = out_sampling_rate
     self._encoder = AcousticFeatureProcess(
         frame_period=acoustic_feature_param.frame_period,
         order=acoustic_feature_param.order,
         alpha=acoustic_feature_param.alpha,
         f0_estimating_method=acoustic_feature_param.f0_estimating_method,
     )
def generate_feature(path1, path2):
    out1 = Path(arguments.output1_directory, path1.stem + '.npy')
    out2 = Path(arguments.output2_directory, path2.stem + '.npy')
    if out1.exists() and out2.exists() and not arguments.enable_overwrite:
        return

    # load wave and padding
    wave_file_load_process = WaveFileLoadProcess(
        sample_rate=arguments.sample_rate,
        top_db=arguments.top_db,
        pad_second=arguments.pad_second,
    )
    wave1 = wave_file_load_process(path1, test=True)
    wave2 = wave_file_load_process(path2, test=True)

    # make acoustic feature
    acoustic_feature_process1 = AcousticFeatureProcess(
        frame_period=arguments.frame_period,
        order=arguments.order,
        alpha=arguments.alpha,
        f0_estimating_method=arguments.f0_estimating_method,
        f0_floor=arguments.f0_floor1,
        f0_ceil=arguments.f0_ceil1,
    )
    acoustic_feature_process2 = AcousticFeatureProcess(
        frame_period=arguments.frame_period,
        order=arguments.order,
        alpha=arguments.alpha,
        f0_estimating_method=arguments.f0_estimating_method,
        f0_floor=arguments.f0_floor2,
        f0_ceil=arguments.f0_ceil2,
    )
    f1 = acoustic_feature_process1(wave1,
                                   test=True).astype_only_float(numpy.float32)
    f2 = acoustic_feature_process2(wave2,
                                   test=True).astype_only_float(numpy.float32)

    # pre convert
    if pre_convert:
        f1_ref = pre_converter1.convert_to_feature(f1)
    else:
        f1_ref = f1

    # alignment
    if not arguments.disable_alignment:
        aligner = MFCCAligner(f1_ref.mfcc, f2.mfcc)

        f0_1, f0_2 = aligner.align(f1.f0, f2.f0)
        spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram,
                                                     f2.spectrogram)
        aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity,
                                                       f2.aperiodicity)
        mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc)
        voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced)

        f1 = AcousticFeature(
            f0=f0_1,
            spectrogram=spectrogram_1,
            aperiodicity=aperiodicity_1,
            mfcc=mfcc_1,
            voiced=voiced_1,
        )
        f2 = AcousticFeature(
            f0=f0_2,
            spectrogram=spectrogram_2,
            aperiodicity=aperiodicity_2,
            mfcc=mfcc_2,
            voiced=voiced_2,
        )

        f1.validate()
        f2.validate()

    # save
    acoustic_feature_save_process = AcousticFeatureSaveProcess(
        validate=True, ignore=arguments.ignore_feature)
    acoustic_feature_save_process({'path': out1, 'feature': f1})
    print('saved!', out1)

    acoustic_feature_save_process({'path': out2, 'feature': f2})
    print('saved!', out2)