コード例 #1
0
ファイル: dataset.py プロジェクト: sshuster/yukarin
    def get_example(self, i):
        train = chainer.config.train

        inputs = self.inputs[i]
        p_input, p_target, p_indexes = inputs.in_feature_path, inputs.out_feature_path, inputs.indexes_path

        indexes = AlignIndexes.load(p_indexes)

        # input feature
        f_in = AcousticFeature.load(p_input)
        f_in = f_in.indexing(indexes.indexes1)
        input = encode_feature(f_in, targets=self.config.in_features)

        # target feature
        f_tar = AcousticFeature.load(p_target)
        f_tar = f_tar.indexing(indexes.indexes2)
        target = encode_feature(f_tar, targets=self.config.out_features)

        mask = encode_feature(make_mask(f_tar),
                              targets=self.config.out_features)

        # padding
        seed = numpy.random.randint(2**31)
        input = random_pad(input,
                           seed=seed,
                           min_size=self.config.train_crop_size)
        target = random_pad(target,
                            seed=seed,
                            min_size=self.config.train_crop_size)
        mask = random_pad(mask,
                          seed=seed,
                          min_size=self.config.train_crop_size)

        # crop
        seed = numpy.random.randint(2**31)
        input = random_crop(input,
                            seed=seed,
                            crop_size=self.config.train_crop_size)
        target = random_crop(target,
                             seed=seed,
                             crop_size=self.config.train_crop_size)
        mask = random_crop(mask,
                           seed=seed,
                           crop_size=self.config.train_crop_size)

        if train:
            input = add_noise(input,
                              p_global=self.config.input_global_noise,
                              p_local=self.config.input_local_noise)
            target = add_noise(target,
                               p_global=self.config.target_global_noise,
                               p_local=self.config.target_local_noise)

        return dict(
            input=input,
            target=target,
            mask=mask,
        )
コード例 #2
0
def generate_align_indexes(pair_path: Tuple[Path, Path]):
    path1, path2 = pair_path
    if path1.stem != path2.stem:
        print('warning: the file names are different', path1, path2)

    out = Path(arguments.output, path1.stem + '.npy')
    if out.exists() and not arguments.enable_overwrite:
        return

    # original
    wave = Wave.load(path=path1, sampling_rate=sconf1.wav_fs)
    wave = wave.pad(pre_second=arguments.pad_second1,
                    post_second=arguments.pad_second1)
    x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70)

    feat1.analyze(x)
    mcep = feat1.mcep(dim=sconf1.mcep_dim, alpha=sconf1.mcep_alpha)

    if arguments.threshold_db1 is not None:
        indexes = wave.get_effective_frame(
            threshold_db=arguments.threshold_db1,
            fft_length=sconf1.wav_fftl,
            frame_period=sconf1.wav_shiftms,
        )
        mcep = mcep[indexes]

    cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]),
                                   cvtype=pconf.GMM_mcep_cvtype)
    mcep1 = numpy.c_[mcep[:, 0], cvmcep_wopow]

    # target
    wave = Wave.load(path=path2, sampling_rate=sconf2.wav_fs)
    wave = wave.pad(pre_second=arguments.pad_second2,
                    post_second=arguments.pad_second2)
    x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70)

    feat2.analyze(x)
    mcep2 = feat2.mcep(dim=sconf2.mcep_dim, alpha=sconf2.mcep_alpha)

    if arguments.threshold_db2 is not None:
        indexes = wave.get_effective_frame(
            threshold_db=arguments.threshold_db2,
            fft_length=sconf2.wav_fftl,
            frame_period=sconf2.wav_shiftms,
        )
        mcep2 = mcep2[indexes]

    # align
    feature1 = AcousticFeature(mc=mcep1)
    feature2 = AcousticFeature(mc=mcep2)
    align_indexes = AlignIndexes.extract(feature1,
                                         feature2,
                                         dtype=arguments.dtype)
    align_indexes.save(path=out,
                       validate=True,
                       ignores=arguments.ignore_feature)
コード例 #3
0
def generate_align_indexes(pair_path: Tuple[Path, Path]):
    path1, path2 = pair_path
    if path1.stem != path2.stem:
        print('warning: the file names are different', path1, path2)

    out = Path(arguments.output, path1.stem + '.npy')
    if out.exists() and not arguments.enable_overwrite:
        return

    feature1 = AcousticFeature.load(path=path1)
    feature2 = AcousticFeature.load(path=path2)

    align_indexes = AlignIndexes.extract(feature1, feature2, dtype=arguments.dtype)

    # save
    align_indexes.save(path=out, validate=True, ignores=arguments.ignore_feature)
コード例 #4
0
def generate_aligned_wave(
    pair_path: Tuple[Path, Path, Path],
    sampling_rate: int,
    frame_period: float,
    alpha: float,
):
    path_feature1, path_feature2, path_indexes = pair_path

    if path_feature1.stem != path_feature2.stem:
        print('warning: the file names are different', path_feature1,
              path_feature2)

    if path_feature1.stem != path_indexes.stem:
        print('warning: the file names are different', path_feature1,
              path_indexes)

    out = Path(arguments.output, path_indexes.stem + '.wav')
    if arguments.disable_overwrite:
        return

    feature1 = AcousticFeature.load(path=path_feature1)
    feature2 = AcousticFeature.load(path=path_feature2)
    feature1.sp = AcousticFeature.mc2sp(feature1.mc,
                                        sampling_rate=sampling_rate,
                                        alpha=alpha)
    feature2.sp = AcousticFeature.mc2sp(feature2.mc,
                                        sampling_rate=sampling_rate,
                                        alpha=alpha)
    feature1.ap = AcousticFeature.decode_ap(feature1.coded_ap,
                                            sampling_rate=sampling_rate)
    feature2.ap = AcousticFeature.decode_ap(feature2.coded_ap,
                                            sampling_rate=sampling_rate)

    align_indexes = AlignIndexes.load(path=path_indexes)
    align_indexes.feature1 = feature1
    align_indexes.feature2 = feature2

    wave1 = align_indexes.get_aligned_feature1().decode(
        sampling_rate=sampling_rate, frame_period=frame_period)
    wave2 = align_indexes.get_aligned_feature2().decode(
        sampling_rate=sampling_rate, frame_period=frame_period)

    # save
    y = numpy.vstack([wave1.wave, wave2.wave])
    librosa.output.write_wav(str(out), y, sr=sampling_rate)