def diff2wav( x, diffmcep, rmcep, wavf=None, fs=22050, fftl=1024, shiftms=10, alpha=0.455 ): synthesizer = Synthesizer(fs=fs, fftl=fftl, shiftms=shiftms) wav = synthesizer.synthesis_diff(x, diffmcep, rmcep=rmcep, alpha=alpha) wav = np.clip(wav, -1.0, 1.0) if wavf is not None: sf.write(wavf, wav, fs) else: return wav
def world2wav( f0, mcep, codeap, wavf=None, fs=22050, fftl=1024, shiftms=10, alpha=0.455 ): synthesizer = Synthesizer(fs=fs, fftl=fftl, shiftms=shiftms) wav = synthesizer.synthesis(f0, mcep, codeap, alpha=alpha) wav = np.clip(wav, -1.0, 1.0) if wavf is not None: sf.write(wavf, wav, fs) else: return wav
def diff2wav( x, diffmcep, rmcep, wavf=None, fs=22050, fftl=1024, shiftms=10, alpha=0.455 ): synthesizer = Synthesizer(fs=fs, fftl=fftl, shiftms=shiftms) wav = synthesizer.synthesis_diff(x, diffmcep, rmcep=rmcep, alpha=alpha) wav = np.clip(np.array(wav, dtype=np.int16), -32768, 32767) if wavf is not None: wavfile.write(wavf, fs, wav) else: return wav
def world_speech_synthesis(queue, wav_list, config): """WORLD speech synthesis Args: queue (multiprocessing.Queue): the queue to store the file name of utterance wav_list (list): list of the wav files config (dict): feature extraction config """ # define synthesizer synthesizer = Synthesizer(fs=config['sampling_rate'], fftl=config['fft_size'], shiftms=config['shiftms']) # synthesis for i, wav_name in enumerate(wav_list): logging.info("now processing %s (%d/%d)" % (wav_name, i + 1, len(wav_list))) # load acoustic features feat_name = path_replace(wav_name, config['indir'], config['outdir'], extname=config['feature_format']) if check_hdf5(feat_name, "/world"): h = read_hdf5(feat_name, "/world") else: logging.error("%s is not existed." % (feat_name)) sys.exit(1) if check_hdf5(feat_name, "/f0"): f0 = read_hdf5(feat_name, "/f0") else: uv = h[:, config['uv_dim_idx']].copy(order='C') f0 = h[:, config['f0_dim_idx']].copy(order='C') # cont_f0_lpf fz_idx = np.where(uv == 0.0) f0[fz_idx] = 0.0 if check_hdf5(feat_name, "/ap"): ap = read_hdf5(feat_name, "/ap") else: codeap = h[:, config['ap_dim_start']:config['ap_dim_end']].copy( order='C') ap = pyworld.decode_aperiodicity(codeap, config['sampling_rate'], config['fft_size']) mcep = h[:, config['mcep_dim_start']:config['mcep_dim_end']].copy( order='C') # waveform synthesis wav = synthesizer.synthesis(f0, mcep, ap, alpha=config['mcep_alpha']) wav = np.clip(np.int16(wav), -32768, 32767) # save restored wav restored_name = path_replace(wav_name, "wav", "world", extname="wav") wavfile.write(restored_name, config['sampling_rate'], wav) queue.put('Finish')
def test_anasyn_16000(self): path = dirpath + '/data/test16000.wav' fs, x = wavfile.read(path) af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5, fftl=1024) f0, spc, ap = af.analyze(x) mcep = af.mcep(dim=24, alpha=0.42) assert len(np.nonzero(f0)[0]) > 0 assert spc.shape == ap.shape # synthesize F0, mcep, ap synth = Synthesizer(fs=fs, fftl=1024, shiftms=5) wav = synth.synthesis(f0, mcep, ap, alpha=0.42) nun_check(wav)
def test_anasyn_44100(self): path = dirpath + '/data/test44100.wav' fs, x = wavfile.read(path) af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5, minf0=100, fftl=2048) f0, spc, ap = af.analyze(x) mcep = af.mcep(dim=40, alpha=0.50) assert len(np.nonzero(f0)[0]) > 0 assert spc.shape == ap.shape # mcep synthesis synth = Synthesizer(fs=fs, fftl=2048, shiftms=5) wav = synth.synthesis(f0, mcep, ap, alpha=0.50) nun_check(wav)
def test_synthesis_from_codeap(self): path = dirpath + '/data/test16000.wav' fs, x = wavfile.read(path) af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5) f0, spc, ap = af.analyze(x) codeap = af.codeap() assert len(np.nonzero(f0)[0]) > 0 assert spc.shape == ap.shape assert pyworld.get_num_aperiodicities(fs) == codeap.shape[-1] ap = pyworld.decode_aperiodicity(codeap, fs, 1024) synth = Synthesizer(fs=fs, fftl=1024, shiftms=5) wav = synth.synthesis_spc(f0, spc, ap) nun_check(wav)
def _synthesize_world_features(self, flbl): # constract Synthesizer class synthesizer = Synthesizer(fs=self.conf["fs"], fftl=self.conf["fftl"], shiftms=self.conf["shiftms"]) # analysis/synthesis using F0, mcep, and ap anasyn = synthesizer.synthesis( self.feats["f0"], self.feats["mcep"], self.feats["ap"], alpha=self.conf["mcep_alpha"], ) self.feats["x_anasyn"] = np.clip(anasyn, -1.0, 1.0) anasynf = self.h5_dir / (flbl + "_anasyn.wav") sf.write(str(anasynf), anasyn, self.conf["fs"])
def _synthesize(self, x, flbl): # constract Synthesizer class synthesizer = Synthesizer(fs=self.conf["fs"], fftl=self.conf["fftl"], shiftms=self.conf["shiftms"]) # analysis/synthesis using F0, mcep, and ap anasyn = synthesizer.synthesis( self.feats["f0"], self.feats["mcep"], self.feats["ap"], alpha=self.conf["mcep_alpha"], ) self.feats["x_anasyn"] = np.clip(anasyn, -32768, 32767) anasynf = self.h5_dir / (flbl + "_anasyn.wav") wavfile.write(anasynf, self.conf["fs"], np.array(self.feats["x_anasyn"], dtype=np.int16))
def main(*argv): argv = argv if argv else sys.argv # Options for python dcp = 'Extract aoucstic features for the speaker' parser = argparse.ArgumentParser(description=dcp) parser.add_argument('--overwrite', default=False, action='store_true', help='Overwrite h5 file') parser.add_argument('speaker', type=str, help='Input speaker label') parser.add_argument('ymlf', type=str, help='Yml file of the input speaker') parser.add_argument('list_file', type=str, help='List file of the input speaker') parser.add_argument('wav_dir', type=str, help='Wav file directory of the speaker') parser.add_argument('pair_dir', type=str, help='Directory of the speaker pair') args = parser.parse_args(argv) # read parameters from speaker yml sconf = SpeakerYML(args.ymlf) h5_dir = os.path.join(args.pair_dir, 'h5') anasyn_dir = os.path.join(args.pair_dir, 'anasyn') if not os.path.exists(os.path.join(h5_dir, args.speaker)): os.makedirs(os.path.join(h5_dir, args.speaker)) if not os.path.exists(os.path.join(anasyn_dir, args.speaker)): os.makedirs(os.path.join(anasyn_dir, args.speaker)) # constract FeatureExtractor class feat = FeatureExtractor(analyzer=sconf.analyzer, fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms, minf0=sconf.f0_minf0, maxf0=sconf.f0_maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms) # open list file with open(args.list_file, 'r') as fp: for line in fp: f = line.rstrip() h5f = os.path.join(h5_dir, f + '.h5') if (not os.path.exists(h5f)) or args.overwrite: wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) assert fs == sconf.wav_fs print("Extract acoustic features: " + wavf) # analyze F0, spc, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha) npow = feat.npow() codeap = feat.codeap() # save features into a hdf5 file h5 = HDF5(h5f, mode='w') h5.save(f0, ext='f0') # h5.save(spc, ext='spc') # h5.save(ap, ext='ap') h5.save(mcep, ext='mcep') h5.save(npow, ext='npow') h5.save(codeap, ext='codeap') h5.close() # analysis/synthesis using F0, mcep, and ap wav = synthesizer.synthesis( f0, mcep, ap, alpha=sconf.mcep_alpha, ) wav = np.clip(wav, -32768, 32767) anasynf = os.path.join(anasyn_dir, f + '.wav') wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16)) else: print("Acoustic features already exist: " + h5f)
def main(*argv): argv = argv if argv else sys.argv[1:] # Options for python description = 'estimate joint feature of source and target speakers' parser = argparse.ArgumentParser(description=description) parser.add_argument('-gmmmode', '--gmmmode', type=str, default=None, help='mode of the GMM [None, diff, or intra]') parser.add_argument('org', type=str, help='Original speaker') parser.add_argument('tar', type=str, help='Target speaker') parser.add_argument('org_yml', type=str, help='Yml file of the original speaker') parser.add_argument('pair_yml', type=str, help='Yml file of the speaker pair') parser.add_argument('eval_list_file', type=str, help='List file for evaluation') parser.add_argument('wav_dir', type=str, help='Directory path of source spekaer') parser.add_argument('pair_dir', type=str, help='Directory path of pair directory') args = parser.parse_args(argv) # read parameters from speaker yml sconf = SpeakerYML(args.org_yml) pconf = PairYML(args.pair_yml) # read GMM for mcep mcepgmmpath = os.path.join(args.pair_dir, 'model/GMM_mcep.pkl') mcepgmm = GMMConvertor( n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype, gmmmode=args.gmmmode, ) param = joblib.load(mcepgmmpath) mcepgmm.open_from_param(param) print("GMM for mcep conversion mode: {}".format(args.gmmmode)) # read F0 statistics stats_dir = os.path.join(args.pair_dir, 'stats') orgstatspath = os.path.join(stats_dir, args.org + '.h5') orgstats_h5 = HDF5(orgstatspath, mode='r') orgf0stats = orgstats_h5.read(ext='f0stats') orgstats_h5.close() # read F0 and GV statistics for target tarstatspath = os.path.join(stats_dir, args.tar + '.h5') tarstats_h5 = HDF5(tarstatspath, mode='r') tarf0stats = tarstats_h5.read(ext='f0stats') targvstats = tarstats_h5.read(ext='gv') tarstats_h5.close() # read GV statistics for converted mcep cvgvstatspath = os.path.join(args.pair_dir, 'model', 'cvgv.h5') cvgvstats_h5 = HDF5(cvgvstatspath, mode='r') cvgvstats = cvgvstats_h5.read(ext='cvgv') diffcvgvstats = cvgvstats_h5.read(ext='diffcvgv') cvgvstats_h5.close() mcepgv = GV() f0stats = F0statistics() # constract FeatureExtractor class feat = FeatureExtractor(analyzer=sconf.analyzer, fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms, minf0=sconf.f0_minf0, maxf0=sconf.f0_maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms) # test directory test_dir = os.path.join(args.pair_dir, 'test') os.makedirs(os.path.join(test_dir, args.org), exist_ok=True) # conversion in each evaluation file with open(args.eval_list_file, 'r') as fp: for line in fp: # open wav file f = line.rstrip() wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = x.astype(np.float) x = low_cut_filter(x, fs, cutoff=70) assert fs == sconf.wav_fs # analyze F0, mcep, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha) mcep_0th = mcep[:, 0] # convert F0 cvf0 = f0stats.convert(f0, orgf0stats, tarf0stats) # convert mcep cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]), cvtype=pconf.GMM_mcep_cvtype) cvmcep = np.c_[mcep_0th, cvmcep_wopow] # synthesis VC w/ GV if args.gmmmode is None: cvmcep_wGV = mcepgv.postfilter(cvmcep, targvstats, cvgvstats=cvgvstats, alpha=pconf.GV_morph_coeff, startdim=1) wav = synthesizer.synthesis( cvf0, cvmcep_wGV, ap, rmcep=mcep, alpha=sconf.mcep_alpha, ) wavpath = os.path.join(test_dir, f + '_VC.wav') # synthesis DIFFVC w/ GV if args.gmmmode == 'diff': cvmcep[:, 0] = 0.0 cvmcep_wGV = mcepgv.postfilter(mcep + cvmcep, targvstats, cvgvstats=diffcvgvstats, alpha=pconf.GV_morph_coeff, startdim=1) - mcep wav = synthesizer.synthesis_diff( x, cvmcep_wGV, rmcep=mcep, alpha=sconf.mcep_alpha, ) wavpath = os.path.join(test_dir, f + '_DIFFVC.wav') # write waveform if not os.path.exists(os.path.join(test_dir, f)): os.makedirs(os.path.join(test_dir, f)) wav = np.clip(wav, -32768, 32767) wavfile.write(wavpath, fs, wav.astype(np.int16)) print(wavpath)
def main(*argv): argv = argv if argv else sys.argv # Options for python dcp = 'Extract aoucstic features for the speaker' parser = argparse.ArgumentParser(description=dcp) parser.add_argument('--overwrite', default=False, action='store_true', help='Overwrite h5 file') parser.add_argument('speaker', type=str, help='Input speaker label') parser.add_argument('ymlf', type=str, help='Yml file of the input speaker') parser.add_argument('list_file', type=str, help='List file of the input speaker') parser.add_argument('wav_dir', type=str, help='Wav file directory of the speaker') parser.add_argument('pair_dir', type=str, help='Directory of the speaker pair') args = parser.parse_args(argv) # read parameters from speaker yml sconf = SpeakerYML(args.ymlf) h5_dir = os.path.join(args.pair_dir, 'h5') anasyn_dir = os.path.join(args.pair_dir, 'anasyn') if not os.path.exists(os.path.join(h5_dir, args.speaker)): os.makedirs(os.path.join(h5_dir, args.speaker)) if not os.path.exists(os.path.join(anasyn_dir, args.speaker)): os.makedirs(os.path.join(anasyn_dir, args.speaker)) # constract FeatureExtractor class feat = FeatureExtractor(analyzer=sconf.analyzer, fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms, minf0=sconf.f0_minf0, maxf0=sconf.f0_maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms) # open list file with open(args.list_file, 'r') as fp: for line in fp: f = line.rstrip() h5f = os.path.join(h5_dir, f + '.h5') if (not os.path.exists(h5f)) or args.overwrite: wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) assert fs == sconf.wav_fs print("Extract acoustic features: " + wavf) # analyze F0, spc, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha) npow = feat.npow() codeap = feat.codeap() # save features into a hdf5 file h5 = HDF5(h5f, mode='w') h5.save(f0, ext='f0') # h5.save(spc, ext='spc') # h5.save(ap, ext='ap') h5.save(mcep, ext='mcep') h5.save(npow, ext='npow') h5.save(codeap, ext='codeap') h5.close() # analysis/synthesis using F0, mcep, and ap wav = synthesizer.synthesis(f0, mcep, ap, alpha=sconf.mcep_alpha, ) wav = np.clip(wav, -32768, 32767) anasynf = os.path.join(anasyn_dir, f + '.wav') wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16)) else: print("Acoustic features already exist: " + h5f)
def main(): if args.file == 'con': file = 'converted' elif args.file == 'tar': file = 'target' else: raise ValueError("The file is incorrect") feat = FeatureExtractor(analyzer='world', fs=22050, fftl=1024, shiftms=5, minf0=args.minf0, maxf0=args.maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=22050, fftl=1024, shiftms=5) # open list file with open('./list/' + file + '.list', 'r') as fp: for line in fp: f = line.rstrip() h5f = os.path.join('./' + file + '/h5f/', f + '.h5') if (not os.path.exists(h5f)): wavf = os.path.join('./' + file + '/wav/', f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) print("Extract acoustic features: " + wavf) # analyze F0, spc, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=34, alpha=0.544) npow = feat.npow() codeap = feat.codeap() # save features into a hdf5 file h5 = HDF5(h5f, mode='w') h5.save(f0, ext='f0') # h5.save(spc, ext='spc') # h5.save(ap, ext='ap') h5.save(mcep, ext='mcep') h5.save(npow, ext='npow') h5.save(codeap, ext='codeap') h5.close() # analysis/synthesis using F0, mcep, and ap wav = synthesizer.synthesis( f0, mcep, ap, alpha=0.544, ) wav = np.clip(wav, -32768, 32767) anasynf = os.path.join('./' + file + '/anasyn/', f + '.wav') wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16)) else: print("Acoustic features already exist: " + h5f)