def test_BlockDiagonalGMM(self): jnt = np.random.rand(1000, 20) gmm_tr = GMMTrainer(n_mix=32, n_iter=10, covtype='block_diag') gmm_tr.train(jnt) data = np.random.rand(200, 5) sddata = static_delta(data) gmm_cv = GMMConvertor(n_mix=32, covtype='full', gmmmode=None) gmm_cv.open_from_param(gmm_tr.param) odata = gmm_cv.convert(sddata, cvtype='mlpg') odata = gmm_cv.convert(sddata, cvtype='mmse') assert data.shape == odata.shape
def test_GMM_train_and_convert(self): jnt = np.random.rand(100, 20) gmm_tr = GMMTrainer(n_mix=4, n_iter=100, covtype='full') gmm_tr.train(jnt) data = np.random.rand(200, 5) sddata = np.c_[data, delta(data)] gmm_cv = GMMConvertor(n_mix=4, covtype='full', gmmmode=None) gmm_cv.open_from_param(gmm_tr.param) odata = gmm_cv.convert(sddata, cvtype='mlpg') odata = gmm_cv.convert(sddata, cvtype='mmse') assert data.shape == odata.shape
def test_GMM_train_and_convert(self): jnt = np.random.rand(100, 20) gmm_tr = GMMTrainer(n_mix=4, n_iter=100, covtype='full') gmm_tr.train(jnt) data = np.random.rand(200, 5) sddata = np.c_[data, delta(data)] gmm_cv = GMMConvertor( n_mix=4, covtype='full', gmmmode=None) gmm_cv.open_from_param(gmm_tr.param) odata = gmm_cv.convert(sddata, cvtype='mlpg') odata = gmm_cv.convert(sddata, cvtype='mmse') assert data.shape == odata.shape
def feature_conversion(pconf, org_mceps, gmm, gmmmode=None): """Conversion of mel-cesptrums Parameters --------- pconf : PairYAML, Class of PairYAML org_mceps : list, shape (`num_mceps`) List of mel-cepstrums gmm : sklean.mixture.GaussianMixture, Parameters of sklearn-based Gaussian mixture gmmmode : str, optional Flag to parameter generation technique `None` : Normal VC `diff` : Differential VC Default set to `None` Returns --------- cv_mceps : list , shape(`num_mceps`) List of converted mel-cespstrums """ cvgmm = GMMConvertor( n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype, gmmmode=gmmmode, ) cvgmm.open_from_param(gmm.param) sd = 1 # start dimension to convert cv_mceps = [] for mcep in org_mceps: mcep_0th = mcep[:, 0] cvmcep = cvgmm.convert(static_delta(mcep[:, sd:]), cvtype=pconf.GMM_mcep_cvtype) cvmcep = np.c_[mcep_0th, cvmcep] if gmmmode == 'diff': cvmcep[:, sd:] += mcep[:, sd:] elif gmmmode is not None: raise ValueError('gmmmode must be `None` or `diff`.') cv_mceps.append(cvmcep) return cv_mceps
def feature_conversion(pconf, org_mceps, gmm, gmmmode=None): """Conversion of mel-cesptrums Parameters --------- pconf : PairYAML, Class of PairYAML org_mceps : list, shape (`num_mceps`) List of mel-cepstrums gmm : sklean.mixture.GaussianMixture, Parameters of sklearn-based Gaussian mixture gmmmode : str, optional Flag to parameter generation technique `None` : Normal VC `diff` : Differential VC Default set to `None` Returns --------- cv_mceps : list , shape(`num_mceps`) List of converted mel-cespstrums """ cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype, gmmmode=gmmmode, ) cvgmm.open_from_param(gmm.param) sd = 1 # start dimension to convert cv_mceps = [] for mcep in org_mceps: mcep_0th = mcep[:, 0] cvmcep = cvgmm.convert(static_delta(mcep[:, sd:]), cvtype=pconf.GMM_mcep_cvtype) cvmcep = np.c_[mcep_0th, cvmcep] if gmmmode == 'diff': cvmcep[:, sd:] += mcep[:, sd:] elif gmmmode is not None: raise ValueError('gmmmode must be `None` or `diff`.') cv_mceps.append(cvmcep) return cv_mceps
def test_GMM_train_and_convert(self): jnt = np.random.rand(100, 20) gmm_tr = GMMTrainer(n_mix=4, n_iter=100, covtype='full') gmm_tr.train(jnt) data = np.random.rand(200, 5) sddata = np.c_[data, delta(data)] gmm_cv = GMMConvertor( n_mix=4, covtype='full', gmmmode=None) gmm_cv.open_from_param(gmm_tr.param) odata = gmm_cv.convert(sddata, cvtype='mlpg') odata = gmm_cv.convert(sddata, cvtype='mmse') assert data.shape == odata.shape # test for singlepath Ajnt = np.random.rand(100, 120) Bjnt = np.random.rand(100, 140) gmm_tr.estimate_responsibility(jnt) Aparam = gmm_tr.train_singlepath(Ajnt) Bparam = gmm_tr.train_singlepath(Bjnt) assert np.allclose(Aparam.weights_, Bparam.weights_)
def main(*argv): argv = argv if argv else sys.argv[1:] # Options for python description = 'estimate joint feature of source and target speakers' parser = argparse.ArgumentParser(description=description) parser.add_argument('-gmmmode', '--gmmmode', type=str, default=None, help='mode of the GMM [None, diff, or intra]') parser.add_argument('org', type=str, help='Original speaker') parser.add_argument('tar', type=str, help='Target speaker') parser.add_argument('org_yml', type=str, help='Yml file of the original speaker') parser.add_argument('pair_yml', type=str, help='Yml file of the speaker pair') parser.add_argument('eval_list_file', type=str, help='List file for evaluation') parser.add_argument('wav_dir', type=str, help='Directory path of source spekaer') parser.add_argument('pair_dir', type=str, help='Directory path of pair directory') args = parser.parse_args(argv) # read parameters from speaker yml sconf = SpeakerYML(args.org_yml) pconf = PairYML(args.pair_yml) # read GMM for mcep mcepgmmpath = os.path.join(args.pair_dir, 'model/GMM_mcep.pkl') mcepgmm = GMMConvertor( n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype, gmmmode=args.gmmmode, ) param = joblib.load(mcepgmmpath) mcepgmm.open_from_param(param) print("GMM for mcep conversion mode: {}".format(args.gmmmode)) # read F0 statistics stats_dir = os.path.join(args.pair_dir, 'stats') orgstatspath = os.path.join(stats_dir, args.org + '.h5') orgstats_h5 = HDF5(orgstatspath, mode='r') orgf0stats = orgstats_h5.read(ext='f0stats') orgstats_h5.close() # read F0 and GV statistics for target tarstatspath = os.path.join(stats_dir, args.tar + '.h5') tarstats_h5 = HDF5(tarstatspath, mode='r') tarf0stats = tarstats_h5.read(ext='f0stats') targvstats = tarstats_h5.read(ext='gv') tarstats_h5.close() # read GV statistics for converted mcep cvgvstatspath = os.path.join(args.pair_dir, 'model', 'cvgv.h5') cvgvstats_h5 = HDF5(cvgvstatspath, mode='r') cvgvstats = cvgvstats_h5.read(ext='cvgv') diffcvgvstats = cvgvstats_h5.read(ext='diffcvgv') cvgvstats_h5.close() mcepgv = GV() f0stats = F0statistics() # constract FeatureExtractor class feat = FeatureExtractor(analyzer=sconf.analyzer, fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms, minf0=sconf.f0_minf0, maxf0=sconf.f0_maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms) # test directory test_dir = os.path.join(args.pair_dir, 'test') os.makedirs(os.path.join(test_dir, args.org), exist_ok=True) # conversion in each evaluation file with open(args.eval_list_file, 'r') as fp: for line in fp: # open wav file f = line.rstrip() wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = x.astype(np.float) x = low_cut_filter(x, fs, cutoff=70) assert fs == sconf.wav_fs # analyze F0, mcep, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha) mcep_0th = mcep[:, 0] # convert F0 cvf0 = f0stats.convert(f0, orgf0stats, tarf0stats) # convert mcep cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]), cvtype=pconf.GMM_mcep_cvtype) cvmcep = np.c_[mcep_0th, cvmcep_wopow] # synthesis VC w/ GV if args.gmmmode is None: cvmcep_wGV = mcepgv.postfilter(cvmcep, targvstats, cvgvstats=cvgvstats, alpha=pconf.GV_morph_coeff, startdim=1) wav = synthesizer.synthesis( cvf0, cvmcep_wGV, ap, rmcep=mcep, alpha=sconf.mcep_alpha, ) wavpath = os.path.join(test_dir, f + '_VC.wav') # synthesis DIFFVC w/ GV if args.gmmmode == 'diff': cvmcep[:, 0] = 0.0 cvmcep_wGV = mcepgv.postfilter(mcep + cvmcep, targvstats, cvgvstats=diffcvgvstats, alpha=pconf.GV_morph_coeff, startdim=1) - mcep wav = synthesizer.synthesis_diff( x, cvmcep_wGV, rmcep=mcep, alpha=sconf.mcep_alpha, ) wavpath = os.path.join(test_dir, f + '_DIFFVC.wav') # write waveform if not os.path.exists(os.path.join(test_dir, f)): os.makedirs(os.path.join(test_dir, f)) wav = np.clip(wav, -32768, 32767) wavfile.write(wavpath, fs, wav.astype(np.int16)) print(wavpath)