def get_example(self, i): train = chainer.config.train inputs = self.inputs[i] p_input, p_target, p_indexes = inputs.in_feature_path, inputs.out_feature_path, inputs.indexes_path indexes = AlignIndexes.load(p_indexes) # input feature f_in = AcousticFeature.load(p_input) f_in = f_in.indexing(indexes.indexes1) input = encode_feature(f_in, targets=self.config.in_features) # target feature f_tar = AcousticFeature.load(p_target) f_tar = f_tar.indexing(indexes.indexes2) target = encode_feature(f_tar, targets=self.config.out_features) mask = encode_feature(make_mask(f_tar), targets=self.config.out_features) # padding seed = numpy.random.randint(2**31) input = random_pad(input, seed=seed, min_size=self.config.train_crop_size) target = random_pad(target, seed=seed, min_size=self.config.train_crop_size) mask = random_pad(mask, seed=seed, min_size=self.config.train_crop_size) # crop seed = numpy.random.randint(2**31) input = random_crop(input, seed=seed, crop_size=self.config.train_crop_size) target = random_crop(target, seed=seed, crop_size=self.config.train_crop_size) mask = random_crop(mask, seed=seed, crop_size=self.config.train_crop_size) if train: input = add_noise(input, p_global=self.config.input_global_noise, p_local=self.config.input_local_noise) target = add_noise(target, p_global=self.config.target_global_noise, p_local=self.config.target_local_noise) return dict( input=input, target=target, mask=mask, )
def get_example(self, i): train = chainer.config.train p_x = self.x_paths[numpy.random.randint(len(self.x_paths))] p_y = self.y_paths[numpy.random.randint(len(self.y_paths))] f_x = AcousticFeature.load(p_x) x = encode_feature(f_x, targets=self.config.in_features) f_y = AcousticFeature.load(p_y) y = encode_feature(f_y, targets=self.config.out_features) mask_x = encode_feature(make_mask(f_x), targets=self.config.in_features) mask_y = encode_feature(make_mask(f_y), targets=self.config.out_features) # padding seed = numpy.random.randint(2**31) x = random_pad(x, seed=seed, min_size=self.config.train_crop_size) mask_x = random_pad(mask_x, seed=seed, min_size=self.config.train_crop_size) seed = numpy.random.randint(2**31) y = random_pad(y, seed=seed, min_size=self.config.train_crop_size) mask_y = random_pad(mask_y, seed=seed, min_size=self.config.train_crop_size) # crop seed = numpy.random.randint(2**31) x = random_crop(x, seed=seed, crop_size=self.config.train_crop_size) mask_x = random_crop(mask_x, seed=seed, crop_size=self.config.train_crop_size) seed = numpy.random.randint(2**31) y = random_crop(y, seed=seed, crop_size=self.config.train_crop_size) mask_y = random_crop(mask_y, seed=seed, crop_size=self.config.train_crop_size) if train: x = add_noise(x, p_global=self.config.input_global_noise, p_local=self.config.input_local_noise) y = add_noise(y, p_global=self.config.target_global_noise, p_local=self.config.target_local_noise) return dict( x=x, y=y, mask_x=mask_x, mask_y=mask_y, )
def generate_align_indexes(pair_path: Tuple[Path, Path]): path1, path2 = pair_path if path1.stem != path2.stem: print('warning: the file names are different', path1, path2) out = Path(arguments.output, path1.stem + '.npy') if out.exists() and not arguments.enable_overwrite: return feature1 = AcousticFeature.load(path=path1) feature2 = AcousticFeature.load(path=path2) align_indexes = AlignIndexes.extract(feature1, feature2, dtype=arguments.dtype) # save align_indexes.save(path=out, validate=True, ignores=arguments.ignore_feature)
def generate_aligned_wave( pair_path: Tuple[Path, Path, Path], sampling_rate: int, frame_period: float, alpha: float, ): path_feature1, path_feature2, path_indexes = pair_path if path_feature1.stem != path_feature2.stem: print('warning: the file names are different', path_feature1, path_feature2) if path_feature1.stem != path_indexes.stem: print('warning: the file names are different', path_feature1, path_indexes) out = Path(arguments.output, path_indexes.stem + '.wav') if arguments.disable_overwrite: return feature1 = AcousticFeature.load(path=path_feature1) feature2 = AcousticFeature.load(path=path_feature2) feature1.sp = AcousticFeature.mc2sp(feature1.mc, sampling_rate=sampling_rate, alpha=alpha) feature2.sp = AcousticFeature.mc2sp(feature2.mc, sampling_rate=sampling_rate, alpha=alpha) feature1.ap = AcousticFeature.decode_ap(feature1.coded_ap, sampling_rate=sampling_rate) feature2.ap = AcousticFeature.decode_ap(feature2.coded_ap, sampling_rate=sampling_rate) align_indexes = AlignIndexes.load(path=path_indexes) align_indexes.feature1 = feature1 align_indexes.feature2 = feature2 wave1 = align_indexes.get_aligned_feature1().decode( sampling_rate=sampling_rate, frame_period=frame_period) wave2 = align_indexes.get_aligned_feature2().decode( sampling_rate=sampling_rate, frame_period=frame_period) # save y = numpy.vstack([wave1.wave, wave2.wave]) librosa.output.write_wav(str(out), y, sr=sampling_rate)
def load_f0(path: Path): feature = AcousticFeature.load(path=path) return feature.f0
def load_acoustic_feature(self, path: Path): return AcousticFeature.load(path)