Esempio n. 1
0
def process(p_in: Path, acoustic_converter: AcousticConverter):
    try:
        if p_in.suffix in ['.npy', '.npz']:
            p_in = Path(glob.glob(str(dataset_wave_dir / p_in.stem) + '.*')[0])

        # input wave
        w_in = acoustic_converter.load_wave(p_in)
        w_in.wave *= input_scale

        # input feature
        f_in = acoustic_converter.extract_acoustic_feature(w_in)
        f_in_effective, effective = acoustic_converter.separate_effective(
            wave=w_in, feature=f_in, threshold=threshold)

        # convert
        f_out = acoustic_converter.convert_loop(f_in_effective)
        f_out = acoustic_converter.combine_silent(effective=effective,
                                                  feature=f_out)
        f_out = acoustic_converter.decode_spectrogram(f_out)

        # save
        sampling_rate = acoustic_converter.out_sampling_rate
        frame_period = acoustic_converter.config.dataset.acoustic_param.frame_period
        wave = f_out.decode(sampling_rate=sampling_rate,
                            frame_period=frame_period)
        librosa.output.write_wav(y=wave.wave,
                                 path=str(output_dir / (p_in.stem + '.wav')),
                                 sr=wave.sampling_rate)
    except:
        import traceback
        traceback.print_exc()
Esempio n. 2
0
def process(p_in: Path, acoustic_converter: AcousticConverter,
            super_resolution: SuperResolution):
    try:
        if p_in.suffix in ['.npy', '.npz']:
            p_in = Path(
                glob.glob(str(dataset_input_wave_dir / p_in.stem) + '.*')[0])

        w_in = acoustic_converter.load_wave(p_in)
        w_in.wave *= input_wave_scale
        f_in = acoustic_converter.extract_acoustic_feature(w_in)
        f_in_effective, effective = acoustic_converter.separate_effective(
            wave=w_in, feature=f_in)
        f_low = acoustic_converter.convert_loop(f_in_effective)
        f_low = acoustic_converter.combine_silent(effective=effective,
                                                  feature=f_low)
        if filter_size is not None:
            f_low.f0 = AcousticConverter.filter_f0(f_low.f0,
                                                   filter_size=filter_size)
        f_low = acoustic_converter.decode_spectrogram(f_low)
        s_high = super_resolution.convert_loop(f_low.sp.astype(numpy.float32))

        # target
        paths = glob.glob(str(dataset_target_wave_dir / p_in.stem) + '.*')
        has_true = len(paths) > 0
        if has_true:
            p_true = Path(paths[0])
            w_true = acoustic_converter.load_wave(p_true)
            f_true = acoustic_converter.extract_acoustic_feature(w_true)
            vmin, vmax = numpy.log(f_true.sp).min(), numpy.log(f_true.sp).max()
        else:
            vmin, vmax = None, None

        # save figure
        fig = plt.figure(figsize=[36, 22])

        plt.subplot(4, 1, 1)
        plt.imshow(numpy.log(f_in.sp).T, aspect='auto', origin='reverse')
        plt.plot(f_in.f0, 'w')
        plt.colorbar()
        plt.clim(vmin=vmin, vmax=vmax)

        plt.subplot(4, 1, 2)
        plt.imshow(numpy.log(f_low.sp).T, aspect='auto', origin='reverse')
        plt.plot(f_low.f0, 'w')
        plt.colorbar()
        plt.clim(vmin=vmin, vmax=vmax)

        plt.subplot(4, 1, 3)
        plt.imshow(numpy.log(s_high).T, aspect='auto', origin='reverse')
        plt.colorbar()
        plt.clim(vmin=vmin, vmax=vmax)

        if has_true:
            plt.subplot(4, 1, 4)
            plt.imshow(numpy.log(f_true.sp).T, aspect='auto', origin='reverse')
            plt.plot(f_true.f0, 'w')
            plt.colorbar()
            plt.clim(vmin=vmin, vmax=vmax)

        fig.savefig(output / (p_in.stem + '.png'))

        # save wave
        f_low_sr = BYAcousticFeature(
            f0=f_low.f0,
            spectrogram=f_low.sp,
            aperiodicity=f_low.ap,
            mfcc=f_low.mc,
            voiced=f_low.voiced,
        )

        rate = acoustic_converter.out_sampling_rate
        wave = super_resolution.convert_to_audio(s_high,
                                                 acoustic_feature=f_low_sr,
                                                 sampling_rate=rate)
        librosa.output.write_wav(y=wave.wave,
                                 path=str(output / (p_in.stem + '.wav')),
                                 sr=rate)
    except:
        import traceback
        traceback.print_exc()