Exemple #1
0
    def get_example(self, i):
        train = chainer.config.train

        inputs = self.inputs[i]
        p_input, p_target, p_indexes = inputs.in_feature_path, inputs.out_feature_path, inputs.indexes_path

        indexes = AlignIndexes.load(p_indexes)

        # input feature
        f_in = AcousticFeature.load(p_input)
        f_in = f_in.indexing(indexes.indexes1)
        input = encode_feature(f_in, targets=self.config.in_features)

        # target feature
        f_tar = AcousticFeature.load(p_target)
        f_tar = f_tar.indexing(indexes.indexes2)
        target = encode_feature(f_tar, targets=self.config.out_features)

        mask = encode_feature(make_mask(f_tar),
                              targets=self.config.out_features)

        # padding
        seed = numpy.random.randint(2**31)
        input = random_pad(input,
                           seed=seed,
                           min_size=self.config.train_crop_size)
        target = random_pad(target,
                            seed=seed,
                            min_size=self.config.train_crop_size)
        mask = random_pad(mask,
                          seed=seed,
                          min_size=self.config.train_crop_size)

        # crop
        seed = numpy.random.randint(2**31)
        input = random_crop(input,
                            seed=seed,
                            crop_size=self.config.train_crop_size)
        target = random_crop(target,
                             seed=seed,
                             crop_size=self.config.train_crop_size)
        mask = random_crop(mask,
                           seed=seed,
                           crop_size=self.config.train_crop_size)

        if train:
            input = add_noise(input,
                              p_global=self.config.input_global_noise,
                              p_local=self.config.input_local_noise)
            target = add_noise(target,
                               p_global=self.config.target_global_noise,
                               p_local=self.config.target_local_noise)

        return dict(
            input=input,
            target=target,
            mask=mask,
        )
Exemple #2
0
    def get_example(self, i):
        train = chainer.config.train

        p_x = self.x_paths[numpy.random.randint(len(self.x_paths))]
        p_y = self.y_paths[numpy.random.randint(len(self.y_paths))]

        f_x = AcousticFeature.load(p_x)
        x = encode_feature(f_x, targets=self.config.in_features)

        f_y = AcousticFeature.load(p_y)
        y = encode_feature(f_y, targets=self.config.out_features)

        mask_x = encode_feature(make_mask(f_x),
                                targets=self.config.in_features)
        mask_y = encode_feature(make_mask(f_y),
                                targets=self.config.out_features)

        # padding
        seed = numpy.random.randint(2**31)
        x = random_pad(x, seed=seed, min_size=self.config.train_crop_size)
        mask_x = random_pad(mask_x,
                            seed=seed,
                            min_size=self.config.train_crop_size)

        seed = numpy.random.randint(2**31)
        y = random_pad(y, seed=seed, min_size=self.config.train_crop_size)
        mask_y = random_pad(mask_y,
                            seed=seed,
                            min_size=self.config.train_crop_size)

        # crop
        seed = numpy.random.randint(2**31)
        x = random_crop(x, seed=seed, crop_size=self.config.train_crop_size)
        mask_x = random_crop(mask_x,
                             seed=seed,
                             crop_size=self.config.train_crop_size)

        seed = numpy.random.randint(2**31)
        y = random_crop(y, seed=seed, crop_size=self.config.train_crop_size)
        mask_y = random_crop(mask_y,
                             seed=seed,
                             crop_size=self.config.train_crop_size)

        if train:
            x = add_noise(x,
                          p_global=self.config.input_global_noise,
                          p_local=self.config.input_local_noise)
            y = add_noise(y,
                          p_global=self.config.target_global_noise,
                          p_local=self.config.target_local_noise)

        return dict(
            x=x,
            y=y,
            mask_x=mask_x,
            mask_y=mask_y,
        )
def generate_align_indexes(pair_path: Tuple[Path, Path]):
    path1, path2 = pair_path
    if path1.stem != path2.stem:
        print('warning: the file names are different', path1, path2)

    out = Path(arguments.output, path1.stem + '.npy')
    if out.exists() and not arguments.enable_overwrite:
        return

    feature1 = AcousticFeature.load(path=path1)
    feature2 = AcousticFeature.load(path=path2)

    align_indexes = AlignIndexes.extract(feature1, feature2, dtype=arguments.dtype)

    # save
    align_indexes.save(path=out, validate=True, ignores=arguments.ignore_feature)
def generate_aligned_wave(
    pair_path: Tuple[Path, Path, Path],
    sampling_rate: int,
    frame_period: float,
    alpha: float,
):
    path_feature1, path_feature2, path_indexes = pair_path

    if path_feature1.stem != path_feature2.stem:
        print('warning: the file names are different', path_feature1,
              path_feature2)

    if path_feature1.stem != path_indexes.stem:
        print('warning: the file names are different', path_feature1,
              path_indexes)

    out = Path(arguments.output, path_indexes.stem + '.wav')
    if arguments.disable_overwrite:
        return

    feature1 = AcousticFeature.load(path=path_feature1)
    feature2 = AcousticFeature.load(path=path_feature2)
    feature1.sp = AcousticFeature.mc2sp(feature1.mc,
                                        sampling_rate=sampling_rate,
                                        alpha=alpha)
    feature2.sp = AcousticFeature.mc2sp(feature2.mc,
                                        sampling_rate=sampling_rate,
                                        alpha=alpha)
    feature1.ap = AcousticFeature.decode_ap(feature1.coded_ap,
                                            sampling_rate=sampling_rate)
    feature2.ap = AcousticFeature.decode_ap(feature2.coded_ap,
                                            sampling_rate=sampling_rate)

    align_indexes = AlignIndexes.load(path=path_indexes)
    align_indexes.feature1 = feature1
    align_indexes.feature2 = feature2

    wave1 = align_indexes.get_aligned_feature1().decode(
        sampling_rate=sampling_rate, frame_period=frame_period)
    wave2 = align_indexes.get_aligned_feature2().decode(
        sampling_rate=sampling_rate, frame_period=frame_period)

    # save
    y = numpy.vstack([wave1.wave, wave2.wave])
    librosa.output.write_wav(str(out), y, sr=sampling_rate)
def load_f0(path: Path):
    feature = AcousticFeature.load(path=path)
    return feature.f0
Exemple #6
0
 def load_acoustic_feature(self, path: Path):
     return AcousticFeature.load(path)