def main():
    parser = argparse.ArgumentParser(
        description='estimate the DTW function and the joint features')
    parser.add_argument('--melspec', help='Melspec data directory')
    parser.add_argument('--npow', help='Npow speaker data directory')
    parser.add_argument('--out_dir', help='Output directory')
    parser.add_argument('--s_npow_th',
                        default=-20.,
                        help='Source speaker npow threshold')
    parser.add_argument('--t_npow_th',
                        default=-15.,
                        help='Target speaker npow threshold')
    parser.add_argument('--n_mix',
                        default=32,
                        help='Number of mixture components')
    parser.add_argument('--n_iter',
                        default=100,
                        help='Number of iterations to get GMM model')
    parser.add_argument('--covtype',
                        default='full',
                        help='GMM covariance type')
    parser.add_argument('--cvtype', default='mlpg', help='conversion mode')
    parser.add_argument('--jnt_n_iter',
                        default=3,
                        help='Number of iterations to get joint features')
    conf = parser.parse_args()
    melspec_dict = pickle_read(conf.melspec)
    npow_dict = pickle_read(conf.npow)
    print('## Alignment mlespec ##')
    jmelspc_lst, twfs = align_feature_vectors(odata=melspec_dict['src'],
                                              onpows=npow_dict['src'],
                                              tdata=melspec_dict['tgt'],
                                              tnpows=npow_dict['tgt'],
                                              pconf=conf,
                                              opow=conf.s_npow_th,
                                              tpow=conf.t_npow_th,
                                              itnum=conf.jnt_n_iter,
                                              sd=0)
    jnt_melspec = transform_jnt(jmelspc_lst)
    if not os.path.isdir(conf.out_dir):
        print('Output directory {} does not exist, create one'.format(
            conf.out_dir))
    print('Save jnt_melspec and twfs to {}'.format(conf.out_dir))
    pickle_save(jnt_melspec, os.path.join(conf.out_dir, 'jnt_melspc.pkl'))
    pickle_save(twfs, os.path.join(conf.out_dir, 'twf.pkl'))
    print('Done!')
    return
Esempio n. 2
0
def align_feature_vectors(odata, onpows, tdata, tnpows, pconf,
                          opow=-100, tpow=-100, itnum=3, sd=0,
                          given_twfs=None, otflag=None):
    """Get alignment to create joint feature vector

    Paramters
    ---------
    odata : list, (`num_files`)
        List of original feature vectors
    onpows : list , (`num_files`)
        List of original npows
    tdata : list, (`num_files`)
        List of target feature vectors
    tnpows : list , (`num_files`)
        List of target npows
    opow : float, optional,
        Power threshold of original
        Default set to -100
    tpow : float, optional,
        Power threshold of target
        Default set to -100
    itnum : int , optional,
        The number of iteration
        Default set to 3
    sd : int , optional,
        Start dimension of feature vector to be used for alignment
        Default set to 0
    given_twf : array, shape (`T_new` `dim * 2`)
        Use given alignment while 1st iteration
        Default set to None
    otflag : str, optional
        Alignment into the length of specification
        'org' : alignment into original length
        'tar' : alignment into target length
        Default set to None

    Returns
    -------
    jfvs : list,
        List of joint feature vectors
    twfs : list,
        List of time warping functions
    """
    num_files = len(odata)
    cvgmm, cvdata = None, None
    for it in range(1, itnum + 1):
        print('{}-th joint feature extraction starts.'.format(it))
        twfs, jfvs = [], []
        for i in range(num_files):
            if it == 1 and given_twfs is not None:
                gtwf = given_twfs[i]
            else:
                gtwf = None
            if it > 1:
                cvdata = cvgmm.convert(static_delta(odata[i][:, sd:]),
                                       cvtype=pconf.GMM_mcep_cvtype)
            jdata, twf, mcd = get_alignment(odata[i],
                                            onpows[i],
                                            tdata[i],
                                            tnpows[i],
                                            opow=opow,
                                            tpow=tpow,
                                            sd=sd,
                                            cvdata=cvdata,
                                            given_twf=gtwf,
                                            otflag=otflag)
            twfs.append(twf)
            jfvs.append(jdata)
            print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd))
        jnt_data = transform_jnt(jfvs)

        if it != itnum:
            # train GMM, if not final iteration
            datagmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                                 n_iter=pconf.GMM_mcep_n_iter,
                                 covtype=pconf.GMM_mcep_covtype)
            datagmm.train(jnt_data)
            cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                                 covtype=pconf.GMM_mcep_covtype)
            cvgmm.open_from_param(datagmm.param)
        it += 1
    return jfvs, twfs
Esempio n. 3
0
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    # Options for python
    description = 'estimate joint feature of source and target speakers'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('org_yml', type=str,
                        help='Yml file of the original speaker')
    parser.add_argument('tar_yml', type=str,
                        help='Yml file of the target speaker')
    parser.add_argument('pair_yml', type=str,
                        help='Yml file of the speaker pair')
    parser.add_argument('org_list_file', type=str,
                        help='List file of original speaker')
    parser.add_argument('tar_list_file', type=str,
                        help='List file of target speaker')
    parser.add_argument('pair_dir', type=str,
                        help='Directory path of h5 files')
    args = parser.parse_args(argv)

    # read speaker-dependent yml files
    oconf = SpeakerYML(args.org_yml)
    tconf = SpeakerYML(args.tar_yml)

    # read pair-dependent yml file
    pconf = PairYML(args.pair_yml)

    # read source and target features from HDF file
    h5_dir = os.path.join(args.pair_dir, 'h5')
    org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep')
    org_npows = read_feats(args.org_list_file, h5_dir, ext='npow')
    tar_mceps = read_feats(args.tar_list_file, h5_dir, ext='mcep')
    tar_npows = read_feats(args.tar_list_file, h5_dir, ext='npow')
    assert len(org_mceps) == len(tar_mceps)
    assert len(org_npows) == len(tar_npows)
    assert len(org_mceps) == len(org_npows)

    # dtw between original and target w/o 0th and silence
    print('## Alignment mcep w/o 0-th and silence ##')
    jmceps, twfs = align_feature_vectors(org_mceps,
                                         org_npows,
                                         tar_mceps,
                                         tar_npows,
                                         pconf,
                                         opow=oconf.power_threshold,
                                         tpow=tconf.power_threshold,
                                         itnum=pconf.jnt_n_iter,
                                         sd=1,
                                         )
    jnt_mcep = transform_jnt(jmceps)

    # create joint feature for codeap using given twfs
    print('## Alignment codeap using given twf ##')
    org_codeaps = read_feats(args.org_list_file, h5_dir, ext='codeap')
    tar_codeaps = read_feats(args.tar_list_file, h5_dir, ext='codeap')
    jcodeaps = []
    for i in range(len(org_codeaps)):
        # extract codeap joint feature vector
        jcodeap, _, _ = get_alignment(org_codeaps[i],
                                      org_npows[i],
                                      tar_codeaps[i],
                                      tar_npows[i],
                                      opow=oconf.power_threshold,
                                      tpow=tconf.power_threshold,
                                      given_twf=twfs[i])
        jcodeaps.append(jcodeap)
    jnt_codeap = transform_jnt(jcodeaps)

    # save joint feature vectors
    jnt_dir = os.path.join(args.pair_dir, 'jnt')
    os.makedirs(jnt_dir, exist_ok=True)
    jntpath = os.path.join(jnt_dir, 'it' + str(pconf.jnt_n_iter) + '_jnt.h5')
    jnth5 = HDF5(jntpath, mode='a')
    jnth5.save(jnt_mcep, ext='mcep')
    jnth5.save(jnt_codeap, ext='codeap')
    jnth5.close()

    # save twfs
    twf_dir = os.path.join(args.pair_dir, 'twf')
    os.makedirs(twf_dir, exist_ok=True)
    with open(args.org_list_file, 'r') as fp:
        for line, twf in zip(fp, twfs):
            f = os.path.basename(line.rstrip())
            twfpath = os.path.join(
                twf_dir, 'it' + str(pconf.jnt_n_iter) + '_' + f + '.h5')
            twfh5 = HDF5(twfpath, mode='a')
            twfh5.save(twf, ext='twf')
            twfh5.close()
def align_ppg_feature_vectors(odata,
                              onpows,
                              tdata,
                              tnpows,
                              pconf,
                              s_list_file,
                              tar_list_file,
                              opow=-100,
                              tpow=-100,
                              itnum=3,
                              sd=0,
                              given_twfs=None,
                              otflag=None):
    s_list_file = np.loadtxt(s_list_file, dtype='str')
    tar_list_file = np.loadtxt(tar_list_file, dtype='str')
    num_files = len(odata)
    cvgmm, cvdata = None, None
    jfvs = []
    for it in range(1, itnum + 1):
        print('{}-th joint feature extraction starts.'.format(it))
        twfs = []
        for i in range(num_files):
            if it == 1 and given_twfs is not None:
                gtwf = given_twfs[i]
            else:
                gtwf = None
            if it > 1:
                cvdata = cvgmm.convert(static_delta(odata[i][:, sd:]),
                                       cvtype=pconf.GMM_mcep_cvtype)
            if it == 1:
                jdata = get_ppg_alignment(odata[i],
                                          onpows[i],
                                          tdata[i],
                                          tnpows[i],
                                          s_list_file[i],
                                          tar_list_file[i],
                                          opow=opow,
                                          tpow=tpow,
                                          sd=sd,
                                          cvdata=cvdata,
                                          given_twf=gtwf,
                                          otflag=otflag)
                print(s_list_file[i])
                jfvs.append(jdata)
                jnt_data = transform_jnt(jfvs)
            _, twf, _ = get_alignment(odata[i],
                                      onpows[i],
                                      tdata[i],
                                      tnpows[i],
                                      opow=opow,
                                      tpow=tpow,
                                      sd=sd,
                                      cvdata=cvdata,
                                      given_twf=gtwf,
                                      otflag=otflag)
            twfs.append(twf)

        if it != itnum:
            # train GMM, if not final iteration
            print("training GMM")
            datagmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                                 n_iter=pconf.GMM_mcep_n_iter,
                                 covtype=pconf.GMM_mcep_covtype)
            datagmm.train(jnt_data)
            cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                                 covtype=pconf.GMM_mcep_covtype)
            cvgmm.open_from_param(datagmm.param)
        it += 1
    return jfvs, twfs
def align_feature_vectors(odata, onpows, tdata, tnpows, pconf,
                          opow=-100, tpow=-100, itnum=3, sd=0,
                          given_twfs=None, otflag=None):
    """Get alignment to create joint feature vector

    Paramters
    ---------
    odata : list, (`num_files`)
        List of original feature vectors
    onpows : list , (`num_files`)
        List of original npows
    tdata : list, (`num_files`)
        List of target feature vectors
    tnpows : list , (`num_files`)
        List of target npows
    opow : float, optional,
        Power threshold of original
        Default set to -100
    tpow : float, optional,
        Power threshold of target
        Default set to -100
    itnum : int , optional,
        The number of iteration
        Default set to 3
    sd : int , optional,
        Start dimension of feature vector to be used for alignment
        Default set to 0
    given_twf : array, shape (`T_new` `dim * 2`)
        Use given alignment while 1st iteration
        Default set to None
    otflag : str, optional
        Alignment into the length of specification
        'org' : alignment into original length
        'tar' : alignment into target length
        Default set to None

    Returns
    -------
    jdata : array, shape (`T_new` `dim * 2`)
        Joint static and delta feature vector
    twf : array, shape (`T_new` `dim * 2`)
        Time warping function
    mcd : float ,
        Mel-cepstrum distortion between arrays

    """
    it = 1
    num_files = len(odata)
    cvgmm, cvdata = None, None
    for it in range(1, itnum + 1):
        print('{}-th joint feature extraction starts.'.format(it))
        # alignment
        twfs, jfvs = [], []
        for i in range(num_files):
            if it == 1 and given_twfs is not None:
                gtwf = given_twfs[i]
            else:
                gtwf = None
            if it > 1:
                cvdata = cvgmm.convert(static_delta(odata[i][:, sd:]),
                                       cvtype=pconf.GMM_mcep_cvtype)
            jdata, twf, mcd = get_alignment(odata[i],
                                            onpows[i],
                                            tdata[i],
                                            tnpows[i],
                                            opow=opow,
                                            tpow=tpow,
                                            sd=sd,
                                            cvdata=cvdata,
                                            given_twf=gtwf,
                                            otflag=otflag)
            twfs.append(twf)
            jfvs.append(jdata)
            print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd))
        jnt_data = transform_jnt(jfvs)

        if it != itnum:
            # train GMM, if not final iteration
            datagmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                                 n_iter=pconf.GMM_mcep_n_iter,
                                 covtype=pconf.GMM_mcep_covtype)
            datagmm.train(jnt_data)
            cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                                 covtype=pconf.GMM_mcep_covtype)
            cvgmm.open_from_param(datagmm.param)
        it += 1
    return jfvs, twfs
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    # Options for python
    description = 'estimate joint feature of source and target speakers'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('org_yml', type=str,
                        help='Yml file of the original speaker')
    parser.add_argument('tar_yml', type=str,
                        help='Yml file of the target speaker')
    parser.add_argument('pair_yml', type=str,
                        help='Yml file of the speaker pair')
    parser.add_argument('org_list_file', type=str,
                        help='List file of original speaker')
    parser.add_argument('tar_list_file', type=str,
                        help='List file of target speaker')
    parser.add_argument('pair_dir', type=str,
                        help='Directory path of h5 files')
    args = parser.parse_args(argv)

    # read speaker-dependent yml files
    oconf = SpeakerYML(args.org_yml)
    tconf = SpeakerYML(args.tar_yml)

    # read pair-dependent yml file
    pconf = PairYML(args.pair_yml)

    # read source and target features from HDF file
    h5_dir = os.path.join(args.pair_dir, 'h5')
    org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep')
    org_npows = read_feats(args.org_list_file, h5_dir, ext='npow')
    tar_mceps = read_feats(args.tar_list_file, h5_dir, ext='mcep')
    tar_npows = read_feats(args.tar_list_file, h5_dir, ext='npow')
    assert len(org_mceps) == len(tar_mceps)
    assert len(org_npows) == len(tar_npows)
    assert len(org_mceps) == len(org_npows)

    # dtw between original and target w/o 0th and silence
    print('## Alignment mcep w/o 0-th and silence ##')
    jmceps, twfs = align_feature_vectors(org_mceps,
                                         org_npows,
                                         tar_mceps,
                                         tar_npows,
                                         pconf,
                                         opow=oconf.power_threshold,
                                         tpow=tconf.power_threshold,
                                         itnum=pconf.jnt_n_iter,
                                         sd=1,
                                         )
    jnt_mcep = transform_jnt(jmceps)

    # create joint feature for codeap using given twfs
    print('## Alignment codeap into target length using given twf ##')
    org_codeaps = read_feats(args.org_list_file, h5_dir, ext='codeap')
    tar_codeaps = read_feats(args.tar_list_file, h5_dir, ext='codeap')
    jcodeaps = []
    for i in range(len(org_codeaps)):
        # extract codeap joint feature vector
        jcodeap, _, _ = get_alignment(org_codeaps[i],
                                      org_npows[i],
                                      tar_codeaps[i],
                                      tar_npows[i],
                                      opow=oconf.power_threshold,
                                      tpow=tconf.power_threshold,
                                      given_twf=twfs[i])
        jcodeaps.append(jcodeap)
    jnt_codeap = transform_jnt(jcodeaps)

    # save joint feature vectors
    jnt_dir = os.path.join(args.pair_dir, 'jnt')
    os.makedirs(jnt_dir, exist_ok=True)
    jntpath = os.path.join(jnt_dir, 'it' + str(pconf.jnt_n_iter) + '_jnt.h5')
    jnth5 = HDF5(jntpath, mode='w')
    jnth5.save(jnt_mcep, ext='mcep')
    jnth5.save(jnt_codeap, ext='codeap')
    jnth5.close()

    # save twfs
    twf_dir = os.path.join(args.pair_dir, 'twf')
    os.makedirs(twf_dir, exist_ok=True)
    with open(args.org_list_file, 'r') as fp:
        for line, twf in zip(fp, twfs):
            f = os.path.basename(line.rstrip())
            twfpath = os.path.join(
                twf_dir, 'it' + str(pconf.jnt_n_iter) + '_' + f + '.h5')
            twfh5 = HDF5(twfpath, mode='w')
            twfh5.save(twf, ext='twf')
            twfh5.close()