def get_example(self, i): train = chainer.config.train inputs = self.inputs[i] p_input, p_target, p_indexes = inputs.in_feature_path, inputs.out_feature_path, inputs.indexes_path indexes = AlignIndexes.load(p_indexes) # input feature f_in = AcousticFeature.load(p_input) f_in = f_in.indexing(indexes.indexes1) input = encode_feature(f_in, targets=self.config.in_features) # target feature f_tar = AcousticFeature.load(p_target) f_tar = f_tar.indexing(indexes.indexes2) target = encode_feature(f_tar, targets=self.config.out_features) mask = encode_feature(make_mask(f_tar), targets=self.config.out_features) # padding seed = numpy.random.randint(2**31) input = random_pad(input, seed=seed, min_size=self.config.train_crop_size) target = random_pad(target, seed=seed, min_size=self.config.train_crop_size) mask = random_pad(mask, seed=seed, min_size=self.config.train_crop_size) # crop seed = numpy.random.randint(2**31) input = random_crop(input, seed=seed, crop_size=self.config.train_crop_size) target = random_crop(target, seed=seed, crop_size=self.config.train_crop_size) mask = random_crop(mask, seed=seed, crop_size=self.config.train_crop_size) if train: input = add_noise(input, p_global=self.config.input_global_noise, p_local=self.config.input_local_noise) target = add_noise(target, p_global=self.config.target_global_noise, p_local=self.config.target_local_noise) return dict( input=input, target=target, mask=mask, )
def generate_align_indexes(pair_path: Tuple[Path, Path]): path1, path2 = pair_path if path1.stem != path2.stem: print('warning: the file names are different', path1, path2) out = Path(arguments.output, path1.stem + '.npy') if out.exists() and not arguments.enable_overwrite: return # original wave = Wave.load(path=path1, sampling_rate=sconf1.wav_fs) wave = wave.pad(pre_second=arguments.pad_second1, post_second=arguments.pad_second1) x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70) feat1.analyze(x) mcep = feat1.mcep(dim=sconf1.mcep_dim, alpha=sconf1.mcep_alpha) if arguments.threshold_db1 is not None: indexes = wave.get_effective_frame( threshold_db=arguments.threshold_db1, fft_length=sconf1.wav_fftl, frame_period=sconf1.wav_shiftms, ) mcep = mcep[indexes] cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]), cvtype=pconf.GMM_mcep_cvtype) mcep1 = numpy.c_[mcep[:, 0], cvmcep_wopow] # target wave = Wave.load(path=path2, sampling_rate=sconf2.wav_fs) wave = wave.pad(pre_second=arguments.pad_second2, post_second=arguments.pad_second2) x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70) feat2.analyze(x) mcep2 = feat2.mcep(dim=sconf2.mcep_dim, alpha=sconf2.mcep_alpha) if arguments.threshold_db2 is not None: indexes = wave.get_effective_frame( threshold_db=arguments.threshold_db2, fft_length=sconf2.wav_fftl, frame_period=sconf2.wav_shiftms, ) mcep2 = mcep2[indexes] # align feature1 = AcousticFeature(mc=mcep1) feature2 = AcousticFeature(mc=mcep2) align_indexes = AlignIndexes.extract(feature1, feature2, dtype=arguments.dtype) align_indexes.save(path=out, validate=True, ignores=arguments.ignore_feature)
def generate_align_indexes(pair_path: Tuple[Path, Path]): path1, path2 = pair_path if path1.stem != path2.stem: print('warning: the file names are different', path1, path2) out = Path(arguments.output, path1.stem + '.npy') if out.exists() and not arguments.enable_overwrite: return feature1 = AcousticFeature.load(path=path1) feature2 = AcousticFeature.load(path=path2) align_indexes = AlignIndexes.extract(feature1, feature2, dtype=arguments.dtype) # save align_indexes.save(path=out, validate=True, ignores=arguments.ignore_feature)
def generate_aligned_wave( pair_path: Tuple[Path, Path, Path], sampling_rate: int, frame_period: float, alpha: float, ): path_feature1, path_feature2, path_indexes = pair_path if path_feature1.stem != path_feature2.stem: print('warning: the file names are different', path_feature1, path_feature2) if path_feature1.stem != path_indexes.stem: print('warning: the file names are different', path_feature1, path_indexes) out = Path(arguments.output, path_indexes.stem + '.wav') if arguments.disable_overwrite: return feature1 = AcousticFeature.load(path=path_feature1) feature2 = AcousticFeature.load(path=path_feature2) feature1.sp = AcousticFeature.mc2sp(feature1.mc, sampling_rate=sampling_rate, alpha=alpha) feature2.sp = AcousticFeature.mc2sp(feature2.mc, sampling_rate=sampling_rate, alpha=alpha) feature1.ap = AcousticFeature.decode_ap(feature1.coded_ap, sampling_rate=sampling_rate) feature2.ap = AcousticFeature.decode_ap(feature2.coded_ap, sampling_rate=sampling_rate) align_indexes = AlignIndexes.load(path=path_indexes) align_indexes.feature1 = feature1 align_indexes.feature2 = feature2 wave1 = align_indexes.get_aligned_feature1().decode( sampling_rate=sampling_rate, frame_period=frame_period) wave2 = align_indexes.get_aligned_feature2().decode( sampling_rate=sampling_rate, frame_period=frame_period) # save y = numpy.vstack([wave1.wave, wave2.wave]) librosa.output.write_wav(str(out), y, sr=sampling_rate)