class ConverterWorker: ''' 特徴量→変換特徴量→修正特徴量→変換音声 ''' def __init__(self, feature_queue: Queue, converted_queue: Queue, mcep_gmm_config: configs.McepGMMConfig, f0_stats_config: configs.F0StatsConfig, gv_config: configs.GVConfig, synthesizer_config: configs.SynthesizerConfig): self._mcep_gmm = GMMConvertor(n_mix=mcep_gmm_config.n_mix, covtype=mcep_gmm_config.covtype, gmmmode=None) self._mcep_gmm.open_from_param(mcep_gmm_config.param) self._mcep_gmm_config = mcep_gmm_config self._feature_queue: Queue = feature_queue self._converted_queue: Queue = converted_queue self._f0_stats = F0statistics() self._f0_stats_config = f0_stats_config self._mcep_gv = GV() self._mcep_gv_config = gv_config self._synthesizer = Synthesizer(fs=synthesizer_config.fs, fftl=synthesizer_config.fftl, shiftms=synthesizer_config.shiftms) self._synthesizer_config = synthesizer_config def convert_from_feature(self, f0, spc, ap, mcep) -> numpy.ndarray: cv_f0 = self._f0_stats.convert(f0, self._f0_stats_config.source_stats, self._f0_stats_config.target_stats) cv_mcep_wopow = self._mcep_gmm.convert( static_delta(mcep[:, 1:]), cvtype=self._mcep_gmm_config.cvtype) cv_mcep = numpy.c_[mcep[:, 0], cv_mcep_wopow] cv_mcep_wGV = self._mcep_gv.postfilter( cv_mcep, self._mcep_gv_config.target_stats, cvgvstats=self._mcep_gv_config.cvgv_stats, alpha=self._mcep_gv_config.morph_coeff, startdim=1) output_wav = self._synthesizer.synthesis( cv_f0, cv_mcep_wGV, ap, rmcep=mcep, alpha=self._synthesizer_config.mcep_alpha) return output_wav.clip(-32768, 32767).astype(numpy.core.int16) def start(self): while True: feature = self._feature_queue.get() # 同期処理 f0, spc, ap, mcep = feature output_wav = self.convert_from_feature(f0, spc, ap, mcep) self._converted_queue.put(output_wav)
def main(*argv): argv = argv if argv else sys.argv[1:] # Options for python description = 'estimate joint feature of source and target speakers' parser = argparse.ArgumentParser(description=description) parser.add_argument('pair_yml', type=str, help='Yml file of the speaker pair') parser.add_argument('org_list_file', type=str, help='List file of original speaker') parser.add_argument('tar_list_file', type=str, help='List file of target speaker') parser.add_argument('pair_dir', type=str, help='Directory path of h5 files') args = parser.parse_args(argv) # read pair-dependent yml file pconf = PairYML(args.pair_yml) # read source and target features from HDF file h5_dir = os.path.join(args.pair_dir, 'h5') org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep') org_npows = read_feats(args.org_list_file, h5_dir, ext='npow') tar_mceps = read_feats(args.tar_list_file, h5_dir, ext='mcep') tar_npows = read_feats(args.tar_list_file, h5_dir, ext='npow') assert len(org_mceps) == len(tar_mceps) assert len(org_npows) == len(tar_npows) assert len(org_mceps) == len(org_npows) itnum = 1 sd = 1 # start dimension for aligment of mcep num_files = len(org_mceps) print('{}-th joint feature extraction starts.'.format(itnum)) # first iteration for i in range(num_files): jdata, _, mcd = get_aligned_jointdata(org_mceps[i][:, sd:], org_npows[i], tar_mceps[i][:, sd:], tar_npows[i]) print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd)) if i == 0: jnt = jdata else: jnt = np.r_[jnt, jdata] itnum += 1 # second through final iteration while itnum < pconf.jnt_n_iter + 1: print('{}-th joint feature extraction starts.'.format(itnum)) # train GMM trgmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix, n_iter=pconf.GMM_mcep_n_iter, covtype=pconf.GMM_mcep_covtype) trgmm.train(jnt) cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype) cvgmm.open_from_param(trgmm.param) twfs = [] for i in range(num_files): cvmcep = cvgmm.convert(static_delta(org_mceps[i][:, sd:]), cvtype=pconf.GMM_mcep_cvtype) jdata, twf, mcd = get_aligned_jointdata(org_mceps[i][:, sd:], org_npows[i], tar_mceps[i][:, sd:], tar_npows[i], cvdata=cvmcep) print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd)) if i == 0: jnt = jdata else: jnt = np.r_[jnt, jdata] twfs.append(twf) itnum += 1 # save joint feature vector jnt_dir = os.path.join(args.pair_dir, 'jnt') if not os.path.exists(jnt_dir): os.makedirs(jnt_dir) jntpath = os.path.join(jnt_dir, 'it' + str(itnum) + '_jnt.h5') jnth5 = HDF5(jntpath, mode='w') jnth5.save(jnt, ext='jnt') jnth5.close() # save GMM gmm_dir = os.path.join(args.pair_dir, 'GMM') if not os.path.exists(gmm_dir): os.makedirs(gmm_dir) gmmpath = os.path.join(gmm_dir, 'it' + str(itnum) + '_gmm.pkl') joblib.dump(trgmm.param, gmmpath) # save twf twf_dir = os.path.join(args.pair_dir, 'twf') if not os.path.exists(twf_dir): os.makedirs(twf_dir) with open(args.org_list_file, 'r') as fp: for line, twf in zip(fp, twfs): f = os.path.basename(line.rstrip()) twfpath = os.path.join(twf_dir, 'it' + str(itnum) + '_' + f + '.h5') twfh5 = HDF5(twfpath, mode='w') twfh5.save(twf, ext='twf') twfh5.close()
def align_feature_vectors(odata, onpows, tdata, tnpows, pconf, opow=-100, tpow=-100, itnum=3, sd=0, given_twfs=None, otflag=None): """Get alignment to create joint feature vector Paramters --------- odata : list, (`num_files`) List of original feature vectors onpows : list , (`num_files`) List of original npows tdata : list, (`num_files`) List of target feature vectors tnpows : list , (`num_files`) List of target npows opow : float, optional, Power threshold of original Default set to -100 tpow : float, optional, Power threshold of target Default set to -100 itnum : int , optional, The number of iteration Default set to 3 sd : int , optional, Start dimension of feature vector to be used for alignment Default set to 0 given_twf : array, shape (`T_new` `dim * 2`) Use given alignment while 1st iteration Default set to None otflag : str, optional Alignment into the length of specification 'org' : alignment into original length 'tar' : alignment into target length Default set to None Returns ------- jfvs : list, List of joint feature vectors twfs : list, List of time warping functions """ num_files = len(odata) cvgmm, cvdata = None, None for it in range(1, itnum + 1): print('{}-th joint feature extraction starts.'.format(it)) twfs, jfvs = [], [] for i in range(num_files): if it == 1 and given_twfs is not None: gtwf = given_twfs[i] else: gtwf = None if it > 1: cvdata = cvgmm.convert(static_delta(odata[i][:, sd:]), cvtype=pconf.GMM_mcep_cvtype) jdata, twf, mcd = get_alignment(odata[i], onpows[i], tdata[i], tnpows[i], opow=opow, tpow=tpow, sd=sd, cvdata=cvdata, given_twf=gtwf, otflag=otflag) twfs.append(twf) jfvs.append(jdata) print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd)) jnt_data = transform_jnt(jfvs) if it != itnum: # train GMM, if not final iteration datagmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix, n_iter=pconf.GMM_mcep_n_iter, covtype=pconf.GMM_mcep_covtype) datagmm.train(jnt_data) cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype) cvgmm.open_from_param(datagmm.param) it += 1 return jfvs, twfs
def align_ppg_feature_vectors(odata, onpows, tdata, tnpows, pconf, s_list_file, tar_list_file, opow=-100, tpow=-100, itnum=3, sd=0, given_twfs=None, otflag=None): s_list_file = np.loadtxt(s_list_file, dtype='str') tar_list_file = np.loadtxt(tar_list_file, dtype='str') num_files = len(odata) cvgmm, cvdata = None, None jfvs = [] for it in range(1, itnum + 1): print('{}-th joint feature extraction starts.'.format(it)) twfs = [] for i in range(num_files): if it == 1 and given_twfs is not None: gtwf = given_twfs[i] else: gtwf = None if it > 1: cvdata = cvgmm.convert(static_delta(odata[i][:, sd:]), cvtype=pconf.GMM_mcep_cvtype) if it == 1: jdata = get_ppg_alignment(odata[i], onpows[i], tdata[i], tnpows[i], s_list_file[i], tar_list_file[i], opow=opow, tpow=tpow, sd=sd, cvdata=cvdata, given_twf=gtwf, otflag=otflag) print(s_list_file[i]) jfvs.append(jdata) jnt_data = transform_jnt(jfvs) _, twf, _ = get_alignment(odata[i], onpows[i], tdata[i], tnpows[i], opow=opow, tpow=tpow, sd=sd, cvdata=cvdata, given_twf=gtwf, otflag=otflag) twfs.append(twf) if it != itnum: # train GMM, if not final iteration print("training GMM") datagmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix, n_iter=pconf.GMM_mcep_n_iter, covtype=pconf.GMM_mcep_covtype) datagmm.train(jnt_data) cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype) cvgmm.open_from_param(datagmm.param) it += 1 return jfvs, twfs
def align_feature_vectors(odata, onpows, tdata, tnpows, pconf, opow=-100, tpow=-100, itnum=3, sd=0, given_twfs=None, otflag=None): """Get alignment to create joint feature vector Paramters --------- odata : list, (`num_files`) List of original feature vectors onpows : list , (`num_files`) List of original npows tdata : list, (`num_files`) List of target feature vectors tnpows : list , (`num_files`) List of target npows opow : float, optional, Power threshold of original Default set to -100 tpow : float, optional, Power threshold of target Default set to -100 itnum : int , optional, The number of iteration Default set to 3 sd : int , optional, Start dimension of feature vector to be used for alignment Default set to 0 given_twf : array, shape (`T_new` `dim * 2`) Use given alignment while 1st iteration Default set to None otflag : str, optional Alignment into the length of specification 'org' : alignment into original length 'tar' : alignment into target length Default set to None Returns ------- jdata : array, shape (`T_new` `dim * 2`) Joint static and delta feature vector twf : array, shape (`T_new` `dim * 2`) Time warping function mcd : float , Mel-cepstrum distortion between arrays """ it = 1 num_files = len(odata) cvgmm, cvdata = None, None for it in range(1, itnum + 1): print('{}-th joint feature extraction starts.'.format(it)) # alignment twfs, jfvs = [], [] for i in range(num_files): if it == 1 and given_twfs is not None: gtwf = given_twfs[i] else: gtwf = None if it > 1: cvdata = cvgmm.convert(static_delta(odata[i][:, sd:]), cvtype=pconf.GMM_mcep_cvtype) jdata, twf, mcd = get_alignment(odata[i], onpows[i], tdata[i], tnpows[i], opow=opow, tpow=tpow, sd=sd, cvdata=cvdata, given_twf=gtwf, otflag=otflag) twfs.append(twf) jfvs.append(jdata) print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd)) jnt_data = transform_jnt(jfvs) if it != itnum: # train GMM, if not final iteration datagmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix, n_iter=pconf.GMM_mcep_n_iter, covtype=pconf.GMM_mcep_covtype) datagmm.train(jnt_data) cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype) cvgmm.open_from_param(datagmm.param) it += 1 return jfvs, twfs