def main(*argv): argv = argv if argv else sys.argv[1:] # Options for python description = 'Train GMM and converted GV statistics' parser = argparse.ArgumentParser(description=description) parser.add_argument('org_list_file', type=str, help='List file of original speaker') parser.add_argument('pair_yml', type=str, help='Yml file of the speaker pair') parser.add_argument('pair_dir', type=str, help='Directory path of h5 files') args = parser.parse_args(argv) # read pair-dependent yml file pconf = PairYML(args.pair_yml) # read joint feature vector jntf = os.path.join(args.pair_dir, 'jnt', 'it' + str(pconf.jnt_n_iter + 1) + '_jnt.h5') jnth5 = HDF5(jntf, mode='r') jnt = jnth5.read(ext='jnt') # train GMM for mcep using joint feature vector gmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix, n_iter=pconf.GMM_mcep_n_iter, covtype=pconf.GMM_mcep_covtype) gmm.train(jnt) # save GMM gmm_dir = os.path.join(args.pair_dir, 'model') if not os.path.exists(gmm_dir): os.makedirs(gmm_dir) gmmpath = os.path.join(gmm_dir, 'GMM.pkl') joblib.dump(gmm.param, gmmpath) print("Conversion model save into " + gmmpath) # calculate GV statistics of converted feature h5_dir = os.path.join(args.pair_dir, 'h5') org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep') cv_mceps = feature_conversion(pconf, org_mceps, gmm, gmmmode=None) diffcv_mceps = feature_conversion(pconf, org_mceps, gmm, gmmmode='diff') gv = GV() cvgvstats = gv.estimate(cv_mceps) diffcvgvstats = gv.estimate(diffcv_mceps) # open h5 files statspath = os.path.join(gmm_dir, 'cvgv.h5') cvgvh5 = HDF5(statspath, mode='w') cvgvh5.save(cvgvstats, ext='cvgv') cvgvh5.save(diffcvgvstats, ext='diffcvgv') print("Converted gvstats save into " + statspath)
def main(*argv): argv = argv if argv else sys.argv[1:] # Options for python description = 'estimate joint feature of source and target speakers' parser = argparse.ArgumentParser(description=description) parser.add_argument('pair_yml', type=str, help='Yml file of the speaker pair') parser.add_argument('org_list_file', type=str, help='List file of original speaker') parser.add_argument('tar_list_file', type=str, help='List file of target speaker') parser.add_argument('pair_dir', type=str, help='Directory path of h5 files') args = parser.parse_args(argv) # read pair-dependent yml file pconf = PairYML(args.pair_yml) # read source and target features from HDF file h5_dir = os.path.join(args.pair_dir, 'h5') org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep') org_npows = read_feats(args.org_list_file, h5_dir, ext='npow') tar_mceps = read_feats(args.tar_list_file, h5_dir, ext='mcep') tar_npows = read_feats(args.tar_list_file, h5_dir, ext='npow') assert len(org_mceps) == len(tar_mceps) assert len(org_npows) == len(tar_npows) assert len(org_mceps) == len(org_npows) itnum = 1 sd = 1 # start dimension for aligment of mcep num_files = len(org_mceps) print('{}-th joint feature extraction starts.'.format(itnum)) # first iteration for i in range(num_files): jdata, _, mcd = get_aligned_jointdata(org_mceps[i][:, sd:], org_npows[i], tar_mceps[i][:, sd:], tar_npows[i]) print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd)) if i == 0: jnt = jdata else: jnt = np.r_[jnt, jdata] itnum += 1 # second through final iteration while itnum < pconf.jnt_n_iter + 1: print('{}-th joint feature extraction starts.'.format(itnum)) # train GMM trgmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix, n_iter=pconf.GMM_mcep_n_iter, covtype=pconf.GMM_mcep_covtype) trgmm.train(jnt) cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype) cvgmm.open_from_param(trgmm.param) twfs = [] for i in range(num_files): cvmcep = cvgmm.convert(static_delta(org_mceps[i][:, sd:]), cvtype=pconf.GMM_mcep_cvtype) jdata, twf, mcd = get_aligned_jointdata(org_mceps[i][:, sd:], org_npows[i], tar_mceps[i][:, sd:], tar_npows[i], cvdata=cvmcep) print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd)) if i == 0: jnt = jdata else: jnt = np.r_[jnt, jdata] twfs.append(twf) itnum += 1 # save joint feature vector jnt_dir = os.path.join(args.pair_dir, 'jnt') if not os.path.exists(jnt_dir): os.makedirs(jnt_dir) jntpath = os.path.join(jnt_dir, 'it' + str(itnum) + '_jnt.h5') jnth5 = HDF5(jntpath, mode='w') jnth5.save(jnt, ext='jnt') jnth5.close() # save GMM gmm_dir = os.path.join(args.pair_dir, 'GMM') if not os.path.exists(gmm_dir): os.makedirs(gmm_dir) gmmpath = os.path.join(gmm_dir, 'it' + str(itnum) + '_gmm.pkl') joblib.dump(trgmm.param, gmmpath) # save twf twf_dir = os.path.join(args.pair_dir, 'twf') if not os.path.exists(twf_dir): os.makedirs(twf_dir) with open(args.org_list_file, 'r') as fp: for line, twf in zip(fp, twfs): f = os.path.basename(line.rstrip()) twfpath = os.path.join(twf_dir, 'it' + str(itnum) + '_' + f + '.h5') twfh5 = HDF5(twfpath, mode='w') twfh5.save(twf, ext='twf') twfh5.close()
def main(*argv): argv = argv if argv else sys.argv[1:] # Options for python description = 'estimate joint feature of source and target speakers' parser = argparse.ArgumentParser(description=description) parser.add_argument('org_yml', type=str, help='Yml file of the original speaker') parser.add_argument('tar_yml', type=str, help='Yml file of the target speaker') parser.add_argument('pair_yml', type=str, help='Yml file of the speaker pair') parser.add_argument('org_list_file', type=str, help='List file of original speaker') parser.add_argument('tar_list_file', type=str, help='List file of target speaker') parser.add_argument('pair_dir', type=str, help='Directory path of h5 files') args = parser.parse_args(argv) # read speaker-dependent yml files oconf = SpeakerYML(args.org_yml) tconf = SpeakerYML(args.tar_yml) # read pair-dependent yml file pconf = PairYML(args.pair_yml) # read source and target features from HDF file h5_dir = os.path.join(args.pair_dir, 'h5') org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep') org_npows = read_feats(args.org_list_file, h5_dir, ext='npow') tar_mceps = read_feats(args.tar_list_file, h5_dir, ext='mcep') tar_npows = read_feats(args.tar_list_file, h5_dir, ext='npow') assert len(org_mceps) == len(tar_mceps) assert len(org_npows) == len(tar_npows) assert len(org_mceps) == len(org_npows) # dtw between original and target w/o 0th and silence print('## Alignment mcep w/o 0-th and silence ##') jmceps, twfs = align_feature_vectors(org_mceps, org_npows, tar_mceps, tar_npows, pconf, opow=oconf.power_threshold, tpow=tconf.power_threshold, itnum=pconf.jnt_n_iter, sd=1, ) jnt_mcep = transform_jnt(jmceps) # create joint feature for codeap using given twfs print('## Alignment codeap using given twf ##') org_codeaps = read_feats(args.org_list_file, h5_dir, ext='codeap') tar_codeaps = read_feats(args.tar_list_file, h5_dir, ext='codeap') jcodeaps = [] for i in range(len(org_codeaps)): # extract codeap joint feature vector jcodeap, _, _ = get_alignment(org_codeaps[i], org_npows[i], tar_codeaps[i], tar_npows[i], opow=oconf.power_threshold, tpow=tconf.power_threshold, given_twf=twfs[i]) jcodeaps.append(jcodeap) jnt_codeap = transform_jnt(jcodeaps) # save joint feature vectors jnt_dir = os.path.join(args.pair_dir, 'jnt') os.makedirs(jnt_dir, exist_ok=True) jntpath = os.path.join(jnt_dir, 'it' + str(pconf.jnt_n_iter) + '_jnt.h5') jnth5 = HDF5(jntpath, mode='a') jnth5.save(jnt_mcep, ext='mcep') jnth5.save(jnt_codeap, ext='codeap') jnth5.close() # save twfs twf_dir = os.path.join(args.pair_dir, 'twf') os.makedirs(twf_dir, exist_ok=True) with open(args.org_list_file, 'r') as fp: for line, twf in zip(fp, twfs): f = os.path.basename(line.rstrip()) twfpath = os.path.join( twf_dir, 'it' + str(pconf.jnt_n_iter) + '_' + f + '.h5') twfh5 = HDF5(twfpath, mode='a') twfh5.save(twf, ext='twf') twfh5.close()