예제 #1
0
    def __init__(self, feature_queue: Queue, converted_queue: Queue,
                 mcep_gmm_config: configs.McepGMMConfig,
                 f0_stats_config: configs.F0StatsConfig,
                 gv_config: configs.GVConfig,
                 synthesizer_config: configs.SynthesizerConfig):

        self._mcep_gmm = GMMConvertor(n_mix=mcep_gmm_config.n_mix,
                                      covtype=mcep_gmm_config.covtype,
                                      gmmmode=None)
        self._mcep_gmm.open_from_param(mcep_gmm_config.param)
        self._mcep_gmm_config = mcep_gmm_config

        self._feature_queue: Queue = feature_queue
        self._converted_queue: Queue = converted_queue

        self._f0_stats = F0statistics()
        self._f0_stats_config = f0_stats_config

        self._mcep_gv = GV()
        self._mcep_gv_config = gv_config

        self._synthesizer = Synthesizer(fs=synthesizer_config.fs,
                                        fftl=synthesizer_config.fftl,
                                        shiftms=synthesizer_config.shiftms)
        self._synthesizer_config = synthesizer_config
예제 #2
0
class ConverterWorker:
    '''
    特徴量→変換特徴量→修正特徴量→変換音声
    '''
    def __init__(self, feature_queue: Queue, converted_queue: Queue,
                 mcep_gmm_config: configs.McepGMMConfig,
                 f0_stats_config: configs.F0StatsConfig,
                 gv_config: configs.GVConfig,
                 synthesizer_config: configs.SynthesizerConfig):

        self._mcep_gmm = GMMConvertor(n_mix=mcep_gmm_config.n_mix,
                                      covtype=mcep_gmm_config.covtype,
                                      gmmmode=None)
        self._mcep_gmm.open_from_param(mcep_gmm_config.param)
        self._mcep_gmm_config = mcep_gmm_config

        self._feature_queue: Queue = feature_queue
        self._converted_queue: Queue = converted_queue

        self._f0_stats = F0statistics()
        self._f0_stats_config = f0_stats_config

        self._mcep_gv = GV()
        self._mcep_gv_config = gv_config

        self._synthesizer = Synthesizer(fs=synthesizer_config.fs,
                                        fftl=synthesizer_config.fftl,
                                        shiftms=synthesizer_config.shiftms)
        self._synthesizer_config = synthesizer_config

    def convert_from_feature(self, f0, spc, ap, mcep) -> numpy.ndarray:
        cv_f0 = self._f0_stats.convert(f0, self._f0_stats_config.source_stats,
                                       self._f0_stats_config.target_stats)

        cv_mcep_wopow = self._mcep_gmm.convert(
            static_delta(mcep[:, 1:]), cvtype=self._mcep_gmm_config.cvtype)
        cv_mcep = numpy.c_[mcep[:, 0], cv_mcep_wopow]

        cv_mcep_wGV = self._mcep_gv.postfilter(
            cv_mcep,
            self._mcep_gv_config.target_stats,
            cvgvstats=self._mcep_gv_config.cvgv_stats,
            alpha=self._mcep_gv_config.morph_coeff,
            startdim=1)

        output_wav = self._synthesizer.synthesis(
            cv_f0,
            cv_mcep_wGV,
            ap,
            rmcep=mcep,
            alpha=self._synthesizer_config.mcep_alpha)
        return output_wav.clip(-32768, 32767).astype(numpy.core.int16)

    def start(self):
        while True:
            feature = self._feature_queue.get()  # 同期処理
            f0, spc, ap, mcep = feature
            output_wav = self.convert_from_feature(f0, spc, ap, mcep)
            self._converted_queue.put(output_wav)
예제 #3
0
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    # Options for python
    description = 'estimate joint feature of source and target speakers'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('pair_yml',
                        type=str,
                        help='Yml file of the speaker pair')
    parser.add_argument('org_list_file',
                        type=str,
                        help='List file of original speaker')
    parser.add_argument('tar_list_file',
                        type=str,
                        help='List file of target speaker')
    parser.add_argument('pair_dir',
                        type=str,
                        help='Directory path of h5 files')
    args = parser.parse_args(argv)

    # read pair-dependent yml file
    pconf = PairYML(args.pair_yml)

    # read source and target features from HDF file
    h5_dir = os.path.join(args.pair_dir, 'h5')
    org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep')
    org_npows = read_feats(args.org_list_file, h5_dir, ext='npow')
    tar_mceps = read_feats(args.tar_list_file, h5_dir, ext='mcep')
    tar_npows = read_feats(args.tar_list_file, h5_dir, ext='npow')
    assert len(org_mceps) == len(tar_mceps)
    assert len(org_npows) == len(tar_npows)
    assert len(org_mceps) == len(org_npows)

    itnum = 1
    sd = 1  # start dimension for aligment of mcep
    num_files = len(org_mceps)
    print('{}-th joint feature extraction starts.'.format(itnum))

    # first iteration
    for i in range(num_files):
        jdata, _, mcd = get_aligned_jointdata(org_mceps[i][:,
                                                           sd:], org_npows[i],
                                              tar_mceps[i][:,
                                                           sd:], tar_npows[i])
        print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd))
        if i == 0:
            jnt = jdata
        else:
            jnt = np.r_[jnt, jdata]
    itnum += 1

    # second through final iteration
    while itnum < pconf.jnt_n_iter + 1:
        print('{}-th joint feature extraction starts.'.format(itnum))
        # train GMM
        trgmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                           n_iter=pconf.GMM_mcep_n_iter,
                           covtype=pconf.GMM_mcep_covtype)
        trgmm.train(jnt)

        cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                             covtype=pconf.GMM_mcep_covtype)
        cvgmm.open_from_param(trgmm.param)
        twfs = []
        for i in range(num_files):
            cvmcep = cvgmm.convert(static_delta(org_mceps[i][:, sd:]),
                                   cvtype=pconf.GMM_mcep_cvtype)
            jdata, twf, mcd = get_aligned_jointdata(org_mceps[i][:, sd:],
                                                    org_npows[i],
                                                    tar_mceps[i][:, sd:],
                                                    tar_npows[i],
                                                    cvdata=cvmcep)
            print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd))
            if i == 0:
                jnt = jdata
            else:
                jnt = np.r_[jnt, jdata]
            twfs.append(twf)

        itnum += 1

    # save joint feature vector
    jnt_dir = os.path.join(args.pair_dir, 'jnt')
    if not os.path.exists(jnt_dir):
        os.makedirs(jnt_dir)
    jntpath = os.path.join(jnt_dir, 'it' + str(itnum) + '_jnt.h5')
    jnth5 = HDF5(jntpath, mode='w')
    jnth5.save(jnt, ext='jnt')
    jnth5.close()

    # save GMM
    gmm_dir = os.path.join(args.pair_dir, 'GMM')
    if not os.path.exists(gmm_dir):
        os.makedirs(gmm_dir)
    gmmpath = os.path.join(gmm_dir, 'it' + str(itnum) + '_gmm.pkl')
    joblib.dump(trgmm.param, gmmpath)

    # save twf
    twf_dir = os.path.join(args.pair_dir, 'twf')
    if not os.path.exists(twf_dir):
        os.makedirs(twf_dir)
    with open(args.org_list_file, 'r') as fp:
        for line, twf in zip(fp, twfs):
            f = os.path.basename(line.rstrip())
            twfpath = os.path.join(twf_dir,
                                   'it' + str(itnum) + '_' + f + '.h5')
            twfh5 = HDF5(twfpath, mode='w')
            twfh5.save(twf, ext='twf')
            twfh5.close()
예제 #4
0
def align_feature_vectors(odata, onpows, tdata, tnpows, pconf,
                          opow=-100, tpow=-100, itnum=3, sd=0,
                          given_twfs=None, otflag=None):
    """Get alignment to create joint feature vector

    Paramters
    ---------
    odata : list, (`num_files`)
        List of original feature vectors
    onpows : list , (`num_files`)
        List of original npows
    tdata : list, (`num_files`)
        List of target feature vectors
    tnpows : list , (`num_files`)
        List of target npows
    opow : float, optional,
        Power threshold of original
        Default set to -100
    tpow : float, optional,
        Power threshold of target
        Default set to -100
    itnum : int , optional,
        The number of iteration
        Default set to 3
    sd : int , optional,
        Start dimension of feature vector to be used for alignment
        Default set to 0
    given_twf : array, shape (`T_new` `dim * 2`)
        Use given alignment while 1st iteration
        Default set to None
    otflag : str, optional
        Alignment into the length of specification
        'org' : alignment into original length
        'tar' : alignment into target length
        Default set to None

    Returns
    -------
    jfvs : list,
        List of joint feature vectors
    twfs : list,
        List of time warping functions
    """
    num_files = len(odata)
    cvgmm, cvdata = None, None
    for it in range(1, itnum + 1):
        print('{}-th joint feature extraction starts.'.format(it))
        twfs, jfvs = [], []
        for i in range(num_files):
            if it == 1 and given_twfs is not None:
                gtwf = given_twfs[i]
            else:
                gtwf = None
            if it > 1:
                cvdata = cvgmm.convert(static_delta(odata[i][:, sd:]),
                                       cvtype=pconf.GMM_mcep_cvtype)
            jdata, twf, mcd = get_alignment(odata[i],
                                            onpows[i],
                                            tdata[i],
                                            tnpows[i],
                                            opow=opow,
                                            tpow=tpow,
                                            sd=sd,
                                            cvdata=cvdata,
                                            given_twf=gtwf,
                                            otflag=otflag)
            twfs.append(twf)
            jfvs.append(jdata)
            print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd))
        jnt_data = transform_jnt(jfvs)

        if it != itnum:
            # train GMM, if not final iteration
            datagmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                                 n_iter=pconf.GMM_mcep_n_iter,
                                 covtype=pconf.GMM_mcep_covtype)
            datagmm.train(jnt_data)
            cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                                 covtype=pconf.GMM_mcep_covtype)
            cvgmm.open_from_param(datagmm.param)
        it += 1
    return jfvs, twfs
def align_ppg_feature_vectors(odata,
                              onpows,
                              tdata,
                              tnpows,
                              pconf,
                              s_list_file,
                              tar_list_file,
                              opow=-100,
                              tpow=-100,
                              itnum=3,
                              sd=0,
                              given_twfs=None,
                              otflag=None):
    s_list_file = np.loadtxt(s_list_file, dtype='str')
    tar_list_file = np.loadtxt(tar_list_file, dtype='str')
    num_files = len(odata)
    cvgmm, cvdata = None, None
    jfvs = []
    for it in range(1, itnum + 1):
        print('{}-th joint feature extraction starts.'.format(it))
        twfs = []
        for i in range(num_files):
            if it == 1 and given_twfs is not None:
                gtwf = given_twfs[i]
            else:
                gtwf = None
            if it > 1:
                cvdata = cvgmm.convert(static_delta(odata[i][:, sd:]),
                                       cvtype=pconf.GMM_mcep_cvtype)
            if it == 1:
                jdata = get_ppg_alignment(odata[i],
                                          onpows[i],
                                          tdata[i],
                                          tnpows[i],
                                          s_list_file[i],
                                          tar_list_file[i],
                                          opow=opow,
                                          tpow=tpow,
                                          sd=sd,
                                          cvdata=cvdata,
                                          given_twf=gtwf,
                                          otflag=otflag)
                print(s_list_file[i])
                jfvs.append(jdata)
                jnt_data = transform_jnt(jfvs)
            _, twf, _ = get_alignment(odata[i],
                                      onpows[i],
                                      tdata[i],
                                      tnpows[i],
                                      opow=opow,
                                      tpow=tpow,
                                      sd=sd,
                                      cvdata=cvdata,
                                      given_twf=gtwf,
                                      otflag=otflag)
            twfs.append(twf)

        if it != itnum:
            # train GMM, if not final iteration
            print("training GMM")
            datagmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                                 n_iter=pconf.GMM_mcep_n_iter,
                                 covtype=pconf.GMM_mcep_covtype)
            datagmm.train(jnt_data)
            cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                                 covtype=pconf.GMM_mcep_covtype)
            cvgmm.open_from_param(datagmm.param)
        it += 1
    return jfvs, twfs
def align_feature_vectors(odata, onpows, tdata, tnpows, pconf,
                          opow=-100, tpow=-100, itnum=3, sd=0,
                          given_twfs=None, otflag=None):
    """Get alignment to create joint feature vector

    Paramters
    ---------
    odata : list, (`num_files`)
        List of original feature vectors
    onpows : list , (`num_files`)
        List of original npows
    tdata : list, (`num_files`)
        List of target feature vectors
    tnpows : list , (`num_files`)
        List of target npows
    opow : float, optional,
        Power threshold of original
        Default set to -100
    tpow : float, optional,
        Power threshold of target
        Default set to -100
    itnum : int , optional,
        The number of iteration
        Default set to 3
    sd : int , optional,
        Start dimension of feature vector to be used for alignment
        Default set to 0
    given_twf : array, shape (`T_new` `dim * 2`)
        Use given alignment while 1st iteration
        Default set to None
    otflag : str, optional
        Alignment into the length of specification
        'org' : alignment into original length
        'tar' : alignment into target length
        Default set to None

    Returns
    -------
    jdata : array, shape (`T_new` `dim * 2`)
        Joint static and delta feature vector
    twf : array, shape (`T_new` `dim * 2`)
        Time warping function
    mcd : float ,
        Mel-cepstrum distortion between arrays

    """
    it = 1
    num_files = len(odata)
    cvgmm, cvdata = None, None
    for it in range(1, itnum + 1):
        print('{}-th joint feature extraction starts.'.format(it))
        # alignment
        twfs, jfvs = [], []
        for i in range(num_files):
            if it == 1 and given_twfs is not None:
                gtwf = given_twfs[i]
            else:
                gtwf = None
            if it > 1:
                cvdata = cvgmm.convert(static_delta(odata[i][:, sd:]),
                                       cvtype=pconf.GMM_mcep_cvtype)
            jdata, twf, mcd = get_alignment(odata[i],
                                            onpows[i],
                                            tdata[i],
                                            tnpows[i],
                                            opow=opow,
                                            tpow=tpow,
                                            sd=sd,
                                            cvdata=cvdata,
                                            given_twf=gtwf,
                                            otflag=otflag)
            twfs.append(twf)
            jfvs.append(jdata)
            print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd))
        jnt_data = transform_jnt(jfvs)

        if it != itnum:
            # train GMM, if not final iteration
            datagmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                                 n_iter=pconf.GMM_mcep_n_iter,
                                 covtype=pconf.GMM_mcep_covtype)
            datagmm.train(jnt_data)
            cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                                 covtype=pconf.GMM_mcep_covtype)
            cvgmm.open_from_param(datagmm.param)
        it += 1
    return jfvs, twfs