Esempio n. 1
0
    def decode_RNN(wav_list, gpu, cvlist=None, cvlist_src=None, \
        mcd_cvlist_src=None, mcdstd_cvlist_src=None, mcdpow_cvlist_src=None, mcdpowstd_cvlist_src=None):
        with torch.cuda.device(gpu):
            mean_trg = torch.FloatTensor(
                read_hdf5(args.stats_jnt,
                          "/mean_feat_org_lf0")[config.stdim:]).cuda()
            std_trg = torch.FloatTensor(
                read_hdf5(args.stats_jnt,
                          "/scale_feat_org_lf0")[config.stdim:]).cuda()
            # define model and load parameters
            logging.info(config)
            logging.info("model")
            with torch.no_grad():
                model_encoder = GRU_RNN_STOCHASTIC(
                    in_dim=config.in_dim,
                    out_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers,
                    hidden_units=config.hidden_units,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    arparam=config.arparam,
                    spk_dim=n_spk,
                    causal_conv=config.causal_conv,
                    scale_out_flag=False)
                model_decoder = GRU_RNN(in_dim=config.lat_dim + n_spk,
                                        out_dim=config.out_dim,
                                        hidden_layers=config.hidden_layers,
                                        hidden_units=config.hidden_units,
                                        kernel_size=config.kernel_size_dec,
                                        dilation_size=config.dilation_size_dec,
                                        causal_conv=config.causal_conv,
                                        scale_in_flag=False)
                logging.info(model_encoder)
                logging.info(model_decoder)
                model_encoder.load_state_dict(
                    torch.load(args.model)["model_encoder"])
                model_decoder.load_state_dict(
                    torch.load(args.model)["model_decoder"])
                model_encoder.cuda()
                model_decoder.cuda()
                model_encoder.eval()
                model_decoder.eval()
                for param in model_encoder.parameters():
                    param.requires_grad = False
                for param in model_decoder.parameters():
                    param.requires_grad = False
                if config.arparam:
                    init_pp = np.zeros((1, 1, config.lat_dim * 2 + n_spk))
                else:
                    init_pp = np.zeros((1, 1, config.lat_dim + n_spk))
                y_in_pp = torch.FloatTensor(init_pp).cuda()
                y_in_src = y_in_trg = torch.unsqueeze(
                    torch.unsqueeze((0 - mean_trg) / std_trg, 0), 0)
            fs = args.fs
            fft_size = args.fftl
            mcep_dim = model_decoder.out_dim - 1
            for wav_file in wav_list:
                # convert mcep
                feat_file = os.path.join(
                    args.h5outdir,
                    os.path.basename(wav_file).replace(".wav", ".h5"))
                logging.info("cvmcep " + feat_file + " " + wav_file)

                fs, x = read_wav(wav_file, cutoff=70)

                time_axis, f0, sp, ap = analyze_range(x, fs=fs, minf0=args.minf0, maxf0=args.maxf0, \
                                                        fperiod=args.shiftms, fftl=args.fftl)
                logging.info(sp.shape)

                mcep = ps.sp2mc(sp, mcep_dim, args.mcep_alpha)
                logging.info(mcep.shape)
                codeap = pw.code_aperiodicity(ap, fs)
                logging.info(codeap.shape)

                npow = spc2npow(sp)
                logging.info(npow.shape)
                _, spcidx = extfrm(mcep, npow, power_threshold=args.pow)
                spcidx = spcidx[0]
                logging.info(spcidx.shape)

                uv, contf0 = convert_continuos_f0(np.array(f0))
                uv = np.expand_dims(uv, axis=-1)
                logging.info(uv.shape)
                cont_f0_lpf = low_pass_filter(contf0,
                                              int(1.0 /
                                                  (args.shiftms * 0.001)),
                                              cutoff=LP_CUTOFF)
                logcontf0 = np.expand_dims(np.log(cont_f0_lpf), axis=-1)
                logging.info(logcontf0.shape)
                feat = np.c_[uv, logcontf0, codeap, mcep]
                logging.info(feat.shape)

                logging.info("generate")
                with torch.no_grad():
                    lat_feat_src, _, _, _, _ = \
                        model_encoder(torch.FloatTensor(feat).cuda(), y_in_pp, sampling=False)

                    src_code = np.zeros((lat_feat_src.shape[0], n_spk))
                    src_code[:, src_code_idx] = 1
                    src_code = torch.FloatTensor(src_code).cuda()

                    trg_code = np.zeros((lat_feat_src.shape[0], n_spk))
                    trg_code[:, trg_code_idx] = 1
                    trg_code = torch.FloatTensor(trg_code).cuda()

                    cvmcep_src, _, _ = model_decoder(
                        torch.cat((src_code, lat_feat_src), 1), y_in_src)
                    cvmcep_src = np.array(cvmcep_src.cpu().data.numpy(),
                                          dtype=np.float64)

                    cvmcep, _, _ = model_decoder(
                        torch.cat((trg_code, lat_feat_src), 1), y_in_trg)
                    cvmcep = np.array(cvmcep.cpu().data.numpy(),
                                      dtype=np.float64)

                logging.info(lat_feat_src.shape)
                logging.info(cvmcep_src.shape)
                logging.info(cvmcep.shape)

                cvf0 = convert_f0(f0, f0_range_mean_src, f0_range_std_src,
                                  f0_range_mean_trg, f0_range_std_trg)
                uv_cv, contf0_cv = convert_continuos_f0(np.array(cvf0))
                uv_cv = np.expand_dims(uv_cv, axis=-1)
                logging.info(uv_cv.shape)
                cont_f0_lpf_cv = low_pass_filter(contf0_cv,
                                                 int(1.0 /
                                                     (args.shiftms * 0.001)),
                                                 cutoff=LP_CUTOFF)
                logcontf0_cv = np.expand_dims(np.log(cont_f0_lpf_cv), axis=-1)
                logging.info(logcontf0_cv.shape)
                feat_cv = np.c_[uv_cv, logcontf0_cv, codeap]
                logging.info(feat_cv.shape)

                feat_cvmcep = np.c_[feat_cv, cvmcep]
                logging.info(feat_cvmcep.shape)
                write_path = '/feat_cvmcep_cycvae-' + model_epoch
                logging.info(feat_file + ' ' + write_path)
                write_hdf5(feat_file, write_path, feat_cvmcep)
                cvlist.append(np.var(cvmcep[:, 1:], axis=0))

                _, mcdpow_arr = dtw.calc_mcd(np.array(mcep[np.array(spcidx),:], dtype=np.float64), \
                                                np.array(cvmcep_src[np.array(spcidx),:], dtype=np.float64))
                _, mcd_arr = dtw.calc_mcd(np.array(mcep[np.array(spcidx),1:], dtype=np.float64), \
                                            np.array(cvmcep_src[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                logging.info("mcdpow_src_cv: %.6f dB +- %.6f" %
                             (mcdpow_mean, mcdpow_std))
                logging.info("mcd_src_cv: %.6f dB +- %.6f" %
                             (mcd_mean, mcd_std))
                mcdpow_cvlist_src.append(mcdpow_mean)
                mcdpowstd_cvlist_src.append(mcdpow_std)
                mcd_cvlist_src.append(mcd_mean)
                mcdstd_cvlist_src.append(mcd_std)
                cvlist_src.append(np.var(cvmcep_src[:, 1:], axis=0))

                logging.info("synth voco")
                cvsp = ps.mc2sp(np.ascontiguousarray(cvmcep), args.mcep_alpha,
                                fft_size)
                logging.info(cvsp.shape)
                wav = np.clip(
                    pw.synthesize(cvf0,
                                  cvsp,
                                  ap,
                                  fs,
                                  frame_period=args.shiftms), -1, 1)
                wavpath = os.path.join(
                    args.outdir,
                    os.path.basename(wav_file).replace(".wav", "_cv.wav"))
                sf.write(wavpath, wav, fs, 'PCM_16')
                logging.info(wavpath)

                logging.info("synth anasyn")
                wav = np.clip(
                    pw.synthesize(f0, sp, ap, fs, frame_period=args.shiftms),
                    -1, 1)
                wavpath = os.path.join(
                    args.outdir,
                    os.path.basename(wav_file).replace(".wav", "_anasyn.wav"))
                sf.write(wavpath, wav, fs, 'PCM_16')
                logging.info(wavpath)
Esempio n. 2
0
    def gpu_decode(feat_list, gpu, cvlist=None, mcdpow_cvlist=None, mcdpowstd_cvlist=None, mcd_cvlist=None, \
                    mcdstd_cvlist=None, cvlist_dv=None, mcdpow_cvlist_dv=None, mcdpowstd_cvlist_dv=None, \
                    mcd_cvlist_dv=None, mcdstd_cvlist_dv=None):
        with torch.cuda.device(gpu):
            mean_jnt = torch.FloatTensor(
                read_hdf5(args.stats_jnt,
                          "/mean_feat_org_lf0")[config.stdim:]).cuda()
            std_jnt = torch.FloatTensor(
                read_hdf5(args.stats_jnt,
                          "/scale_feat_org_lf0")[config.stdim:]).cuda()
            # define model and load parameters
            logging.info("model")
            logging.info(config)
            with torch.no_grad():
                model_encoder = GRU_RNN_STOCHASTIC(
                    in_dim=config.in_dim,
                    out_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers,
                    hidden_units=config.hidden_units,
                    kernel_size=config.kernel_size,
                    dilation_size=config.dilation_size,
                    spk_dim=n_spk,
                    scale_out_flag=False)
                model_decoder = GRU_RNN(in_dim=config.lat_dim + n_spk,
                                        out_dim=config.out_dim,
                                        hidden_layers=config.hidden_layers,
                                        hidden_units=config.hidden_units,
                                        kernel_size=config.kernel_size,
                                        dilation_size=config.dilation_size,
                                        scale_in_flag=False)
                logging.info(model_encoder)
                logging.info(model_decoder)
                model_encoder.load_state_dict(
                    torch.load(args.model)["model_encoder"])
                model_decoder.load_state_dict(
                    torch.load(args.model)["model_decoder"])
                model_encoder.cuda()
                model_decoder.cuda()
                model_encoder.eval()
                model_decoder.eval()
                for param in model_encoder.parameters():
                    param.requires_grad = False
                for param in model_decoder.parameters():
                    param.requires_grad = False
                init_pp = np.zeros((1, 1, config.lat_dim * 2 + n_spk))
                y_in_pp = torch.FloatTensor(init_pp).cuda()
                y_in_src = y_in_trg = torch.unsqueeze(
                    torch.unsqueeze((0 - mean_jnt) / std_jnt, 0), 0)
            for feat_file in feat_list:
                # convert mcep
                logging.info("recmcep " + feat_file)

                feat = read_hdf5(feat_file, "/feat_org_lf0")
                logging.info(feat.shape)
                f0 = read_hdf5(feat_file, "/f0_range")
                cvf0 = convert_f0(f0, f0_range_mean_trg, f0_range_std_trg,
                                  f0_range_mean_src, f0_range_std_src)
                cvuv, cont_f0 = convert_continuos_f0(cvf0)
                cvuv = np.expand_dims(cvuv, axis=-1)
                cont_f0_lpf = low_pass_filter(cont_f0,
                                              int(1.0 /
                                                  (args.shiftms * 0.001)),
                                              cutoff=LP_CUTOFF)
                if np.min(cont_f0_lpf) <= 0:
                    length = len(cont_f0_lpf)
                    for i in range(length):
                        if cont_f0_lpf[i] <= 0:
                            if i > 0 and i < length - 1:
                                for j in range(i - 1, -1, -1):
                                    if cont_f0_lpf[j] > 0:
                                        left_val = cont_f0_lpf[j]
                                        break
                                for j in range(i + 1, length):
                                    if cont_f0_lpf[j] > 0:
                                        right_val = cont_f0_lpf[j]
                                        break
                                cont_f0_lpf[i] = (left_val + right_val) / 2
                            elif i == 0:
                                for j in range(1, length):
                                    if cont_f0_lpf[j] > 0:
                                        right_val = cont_f0_lpf[j]
                                        break
                                cont_f0_lpf[i] = right_val
                            else:
                                for j in range(i - 1, -1, -1):
                                    if cont_f0_lpf[j] > 0:
                                        left_val = cont_f0_lpf[j]
                                        break
                                cont_f0_lpf[i] = left_val
                cvlogf0fil = np.expand_dims(np.log(cont_f0_lpf), axis=-1)
                feat_cv = np.c_[cvuv, cvlogf0fil, feat[:, 2:config.stdim]]
                with torch.no_grad():
                    lat_feat, _, _, _, _ = model_encoder(torch.FloatTensor(feat).cuda(), \
                                                        y_in_pp, sampling=False)
                    src_code = np.zeros((lat_feat.shape[0], n_spk))
                    src_code[:, src_code_idx] = 1
                    src_code = torch.FloatTensor(src_code).cuda()
                    cvmcep, _, _ = model_decoder(
                        torch.cat((src_code, lat_feat), 1), y_in_src)
                    lat_feat, _, _, _, _ = model_encoder(torch.cat((torch.FloatTensor(feat_cv).cuda(), cvmcep),1), \
                                                        y_in_pp, sampling=False)
                    trg_code = np.zeros((lat_feat.shape[0], n_spk))
                    trg_code[:, trg_code_idx] = 1
                    trg_code = torch.FloatTensor(trg_code).cuda()
                    cvmcep, _, _ = model_decoder(
                        torch.cat((trg_code, lat_feat), 1), y_in_trg)

                cvmcep = np.array(cvmcep.cpu().data.numpy(), dtype=np.float64)
                logging.info(cvmcep.shape)

                mcep = feat[:, config.stdim:]
                spcidx = read_hdf5(feat_file, "/spcidx_range")[0]
                _, mcdpow_arr = dtw.calc_mcd(np.array(mcep[np.array(spcidx),:], dtype=np.float64), \
                                            np.array(cvmcep[np.array(spcidx),:], dtype=np.float64))
                _, mcd_arr = dtw.calc_mcd(np.array(mcep[np.array(spcidx),1:], dtype=np.float64), \
                                            np.array(cvmcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                dataset = feat_file.split('/')[1].split('_')[0]
                if 'tr' in dataset:
                    logging.info('trn')
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpowstd_cvlist.append(mcdpow_std)
                    mcd_cvlist.append(mcd_mean)
                    mcdstd_cvlist.append(mcd_std)
                    cvlist.append(np.var(cvmcep[:, 1:], axis=0))
                    logging.info(len(cvlist))
                elif 'dv' in dataset:
                    logging.info('dev')
                    mcdpow_cvlist_dv.append(mcdpow_mean)
                    mcdpowstd_cvlist_dv.append(mcdpow_std)
                    mcd_cvlist_dv.append(mcd_mean)
                    mcdstd_cvlist_dv.append(mcd_std)
                    cvlist_dv.append(np.var(cvmcep[:, 1:], axis=0))
                    logging.info(len(cvlist_dv))
                logging.info("mcdpow_rec: %.6f dB +- %.6f" %
                             (mcdpow_mean, mcdpow_std))
                logging.info("mcd_rec: %.6f dB +- %.6f" % (mcd_mean, mcd_std))

                feat_cvmcep = np.c_[feat[:, :config.stdim], cvmcep]
                logging.info(feat_cvmcep.shape)
                write_path = '/feat_recmcep_cycvae-' + model_epoch
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    spk_trg + "-" + spk_src + "-" + spk_trg)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file_cyc = os.path.join(outh5dir,
                                             os.path.basename(feat_file))
                logging.info(feat_file_cyc + ' ' + write_path)
                write_hdf5(feat_file_cyc, write_path, feat_cvmcep)
    def gpu_decode(feat_list, gpu, cvlist=None, mcdpow_cvlist=None, mcdpowstd_cvlist=None, mcd_cvlist=None, \
                    mcdstd_cvlist=None, cvlist_dv=None, mcdpow_cvlist_dv=None, mcdpowstd_cvlist_dv=None, \
                    mcd_cvlist_dv=None, mcdstd_cvlist_dv=None):
        with torch.cuda.device(gpu):
            mean_jnt = torch.FloatTensor(
                read_hdf5(args.stats_jnt,
                          "/mean_feat_org_lf0")[config.stdim:]).cuda()
            std_jnt = torch.FloatTensor(
                read_hdf5(args.stats_jnt,
                          "/scale_feat_org_lf0")[config.stdim:]).cuda()
            # define model and load parameters
            logging.info("model")
            logging.info(config)
            with torch.no_grad():
                model_encoder = GRU_RNN_STOCHASTIC(
                    in_dim=config.in_dim,
                    out_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers,
                    hidden_units=config.hidden_units,
                    kernel_size=config.kernel_size,
                    dilation_size=config.dilation_size,
                    spk_dim=n_spk,
                    scale_out_flag=False)
                model_decoder = GRU_RNN(in_dim=config.lat_dim + n_spk,
                                        out_dim=config.out_dim,
                                        hidden_layers=config.hidden_layers,
                                        hidden_units=config.hidden_units,
                                        kernel_size=config.kernel_size,
                                        dilation_size=config.dilation_size,
                                        scale_in_flag=False)
                logging.info(model_encoder)
                logging.info(model_decoder)
                model_encoder.load_state_dict(
                    torch.load(args.model)["model_encoder"])
                model_decoder.load_state_dict(
                    torch.load(args.model)["model_decoder"])
                model_encoder.cuda()
                model_decoder.cuda()
                model_encoder.eval()
                model_decoder.eval()
                for param in model_encoder.parameters():
                    param.requires_grad = False
                for param in model_decoder.parameters():
                    param.requires_grad = False
                init_pp = np.zeros((1, 1, config.lat_dim * 2 + n_spk))
                y_in_pp = torch.FloatTensor(init_pp).cuda()
                y_in_src = torch.unsqueeze(
                    torch.unsqueeze((0 - mean_jnt) / std_jnt, 0), 0)
            for feat_file in feat_list:
                # convert mcep
                logging.info("recmcep " + feat_file)

                feat = read_hdf5(feat_file, "/feat_org_lf0")
                logging.info(feat.shape)
                with torch.no_grad():
                    lat_feat, _, _, _, _ = model_encoder(torch.FloatTensor(feat).cuda(), \
                                                        y_in_pp, sampling=False)
                    spk_code = np.zeros((lat_feat.shape[0], n_spk))
                    spk_code[:, spk_code_idx] = 1
                    spk_code = torch.FloatTensor(spk_code).cuda()
                    cvmcep, _, _ = model_decoder(
                        torch.cat((spk_code, lat_feat), 1), y_in_src)

                cvmcep = np.array(cvmcep.cpu().data.numpy(), dtype=np.float64)
                logging.info(cvmcep.shape)

                mcep = feat[:, config.stdim:]
                spcidx = read_hdf5(feat_file, "/spcidx_range")[0]
                _, mcdpow_arr = dtw.calc_mcd(np.array(mcep[np.array(spcidx),:], dtype=np.float64), \
                                            np.array(cvmcep[np.array(spcidx),:], dtype=np.float64))
                _, mcd_arr = dtw.calc_mcd(np.array(mcep[np.array(spcidx),1:], dtype=np.float64), \
                                            np.array(cvmcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                dataset = feat_file.split('/')[1].split('_')[0]
                if 'tr' in dataset:
                    logging.info('trn')
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpowstd_cvlist.append(mcdpow_std)
                    mcd_cvlist.append(mcd_mean)
                    mcdstd_cvlist.append(mcd_std)
                    cvlist.append(np.var(cvmcep[:, 1:], axis=0))
                    logging.info(len(cvlist))
                elif 'dv' in dataset:
                    logging.info('dev')
                    mcdpow_cvlist_dv.append(mcdpow_mean)
                    mcdpowstd_cvlist_dv.append(mcdpow_std)
                    mcd_cvlist_dv.append(mcd_mean)
                    mcdstd_cvlist_dv.append(mcd_std)
                    cvlist_dv.append(np.var(cvmcep[:, 1:], axis=0))
                    logging.info(len(cvlist_dv))
                logging.info("mcdpow_rec: %.6f dB +- %.6f" %
                             (mcdpow_mean, mcdpow_std))
                logging.info("mcd_rec: %.6f dB +- %.6f" % (mcd_mean, mcd_std))

                logging.info("mod_pow")
                cvmcep = mod_pow(cvmcep,
                                 mcep,
                                 alpha=args.mcep_alpha,
                                 irlen=IRLEN)
                logging.info(cvmcep.shape)
                feat_cvmcep = np.c_[feat[:, :config.stdim], cvmcep]
                logging.info(feat_cvmcep.shape)
                write_path = '/feat_recmcep_cycvae-' + model_epoch
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    spk + "-" + spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + write_path)
                write_hdf5(feat_file, write_path, feat_cvmcep)
Esempio n. 4
0
    def gpu_decode(feat_list,
                   feat_trg_list,
                   gpu,
                   cvlist=None,
                   mcdlist=None,
                   mcdstdlist=None,
                   mcdpowlist=None,
                   mcdpowstdlist=None,
                   cvlist_src=None,
                   mcdlist_src=None,
                   mcdstdlist_src=None,
                   mcdpowlist_src=None,
                   mcdpowstdlist_src=None,
                   cvlist_trg=None,
                   mcdlist_trg=None,
                   mcdstdlist_trg=None,
                   mcdpowlist_trg=None,
                   mcdpowstdlist_trg=None,
                   lat_dist_rmse_enc_list=None,
                   lat_dist_cosim_enc_list=None,
                   lat_dist_rmse_pri_list=None,
                   lat_dist_cosim_pri_list=None):
        with torch.cuda.device(gpu):
            mean_jnt = torch.FloatTensor(
                read_hdf5(args.stats_jnt,
                          "/mean_feat_org_lf0_jnt")[config.stdim:]).cuda()
            std_jnt = torch.FloatTensor(
                read_hdf5(args.stats_jnt,
                          "/scale_feat_org_lf0_jnt")[config.stdim:]).cuda()
            # define model and load parameters
            logging.info("model")
            logging.info(config)
            with torch.no_grad():
                model_encoder = GRU_RNN(in_dim=config.in_dim,
                                        out_dim=config.lat_dim * 2,
                                        hidden_layers=config.hidden_layers,
                                        hidden_units=config.hidden_units,
                                        kernel_size=config.kernel_size,
                                        dilation_size=config.dilation_size,
                                        scale_out_flag=False)
                model_decoder = GRU_RNN(in_dim=config.lat_dim + 2,
                                        out_dim=config.out_dim,
                                        hidden_layers=config.hidden_layers,
                                        hidden_units=config.hidden_units,
                                        kernel_size=config.kernel_size,
                                        dilation_size=config.dilation_size,
                                        scale_in_flag=False)
                model_encoder.load_state_dict(
                    torch.load(args.model)["model_encoder"])
                model_decoder.load_state_dict(
                    torch.load(args.model)["model_decoder"])
                model_encoder.cuda()
                model_decoder.cuda()
                model_encoder.eval()
                model_decoder.eval()
                for param in model_encoder.parameters():
                    param.requires_grad = False
                for param in model_decoder.parameters():
                    param.requires_grad = False
                logging.info(model_encoder)
                logging.info(model_decoder)
                init_pp = np.zeros((1, 1, config.lat_dim * 2))
                y_in_pp = torch.FloatTensor(init_pp).cuda()
                y_in_src = y_in_trg = torch.unsqueeze(
                    torch.unsqueeze((0 - mean_jnt) / std_jnt, 0), 0)
            for feat_file, feat_trg_file in zip(feat_list, feat_trg_list):
                # convert mcep
                logging.info("cvmcep " + feat_file + " " + feat_trg_file)

                feat = read_hdf5(feat_file, "/feat_org_lf0")
                feat_trg = read_hdf5(feat_trg_file, "/feat_org_lf0")
                logging.info(feat.shape)
                logging.info(feat_trg.shape)
                with torch.no_grad():
                    lat_src, _, _ = model_encoder(
                        torch.FloatTensor(feat).cuda(),
                        y_in_pp,
                        clamp_vae=True,
                        lat_dim=config.lat_dim)
                    lat_feat = sampling_vae_batch(lat_src.unsqueeze(0).repeat(
                        args.n_smpl_dec, 1, 1),
                                                  lat_dim=config.lat_dim)
                    lat_feat = torch.mean(lat_feat, 0)
                    lat_trg, _, _ = model_encoder(
                        torch.FloatTensor(feat_trg).cuda(),
                        y_in_pp,
                        clamp_vae=True,
                        lat_dim=config.lat_dim)
                    lat_feat_trg = sampling_vae_batch(
                        lat_trg.unsqueeze(0).repeat(args.n_smpl_dec, 1, 1),
                        lat_dim=config.lat_dim)
                    lat_feat_trg = torch.mean(lat_feat_trg, 0)
                    src_code = np.zeros((lat_feat.shape[0], 2))
                    trg_code = np.zeros((lat_feat.shape[0], 2))
                    trg_trg_code = np.zeros((lat_feat_trg.shape[0], 2))
                    src_code[:, 0] = 1
                    trg_code[:, 1] = 1
                    trg_trg_code[:, 1] = 1
                    src_code = torch.FloatTensor(src_code).cuda()
                    trg_code = torch.FloatTensor(trg_code).cuda()
                    trg_trg_code = torch.FloatTensor(trg_trg_code).cuda()
                    cvmcep, _, _ = model_decoder(
                        torch.cat((trg_code, lat_feat), 1), y_in_trg)
                    cvmcep = np.array(cvmcep.cpu().data.numpy(),
                                      dtype=np.float64)
                    cvmcep_src, _, _ = model_decoder(
                        torch.cat((src_code, lat_feat), 1), y_in_src)
                    cvmcep_src = np.array(cvmcep_src.cpu().data.numpy(),
                                          dtype=np.float64)
                    cvmcep_trg, _, _ = model_decoder(
                        torch.cat((trg_trg_code, lat_feat_trg), 1), y_in_trg)
                    cvmcep_trg = np.array(cvmcep_trg.cpu().data.numpy(),
                                          dtype=np.float64)

                logging.info(cvmcep.shape)
                logging.info(cvmcep_trg.shape)
                cvlist.append(np.var(cvmcep[:, 1:], axis=0))
                cvlist_src.append(np.var(cvmcep_src[:, 1:], axis=0))
                cvlist_trg.append(np.var(cvmcep_trg[:, 1:], axis=0))
                logging.info(len(cvlist))

                spcidx_src = read_hdf5(feat_file, "/spcidx_range")[0]
                mcep_trg = read_hdf5(feat_trg_file, "/mcepspc_range")
                _, _, _, mcdpow_arr = dtw.dtw_org_to_trg(
                    np.array(cvmcep[np.array(spcidx_src), :],
                             dtype=np.float64),
                    np.array(mcep_trg[:, :], dtype=np.float64))
                _, _, _, mcd_arr = dtw.dtw_org_to_trg(
                    np.array(cvmcep[np.array(spcidx_src), 1:],
                             dtype=np.float64),
                    np.array(mcep_trg[:, 1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                logging.info("mcdpow: %.6f dB +- %.6f" %
                             (mcdpow_mean, mcdpow_std))
                logging.info("mcd: %.6f dB +- %.6f" % (mcd_mean, mcd_std))
                mcdpowlist.append(mcdpow_mean)
                mcdpowstdlist.append(mcdpow_std)
                mcdlist.append(mcd_mean)
                mcdstdlist.append(mcd_std)

                mcep_src = read_hdf5(feat_file, "/mcepspc_range")
                _, mcdpow_arr = dtw.calc_mcd(
                    np.array(mcep_src[:, :], dtype=np.float64),
                    np.array(cvmcep_src[np.array(spcidx_src), :],
                             dtype=np.float64))
                _, mcd_arr = dtw.calc_mcd(
                    np.array(mcep_src[:, 1:], dtype=np.float64),
                    np.array(cvmcep_src[np.array(spcidx_src), 1:],
                             dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                logging.info("mcdpow_src: %.6f dB +- %.6f" %
                             (mcdpow_mean, mcdpow_std))
                logging.info("mcd_src: %.6f dB +- %.6f" % (mcd_mean, mcd_std))
                mcdpowlist_src.append(mcdpow_mean)
                mcdpowstdlist_src.append(mcdpow_std)
                mcdlist_src.append(mcd_mean)
                mcdstdlist_src.append(mcd_std)

                spcidx_trg = read_hdf5(feat_trg_file, "/spcidx_range")[0]
                _, mcdpow_arr = dtw.calc_mcd(
                    np.array(mcep_trg[:, :], dtype=np.float64),
                    np.array(cvmcep_trg[np.array(spcidx_trg), :],
                             dtype=np.float64))
                _, mcd_arr = dtw.calc_mcd(
                    np.array(mcep_trg[:, 1:], dtype=np.float64),
                    np.array(cvmcep_trg[np.array(spcidx_trg), 1:],
                             dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                logging.info("mcdpow_trg: %.6f dB +- %.6f" %
                             (mcdpow_mean, mcdpow_std))
                logging.info("mcd_trg: %.6f dB +- %.6f" % (mcd_mean, mcd_std))
                mcdpowlist_trg.append(mcdpow_mean)
                mcdpowstdlist_trg.append(mcdpow_std)
                mcdlist_trg.append(mcd_mean)
                mcdstdlist_trg.append(mcd_std)

                with torch.no_grad():
                    spcidx_src = torch.LongTensor(spcidx_src).cuda()
                    spcidx_trg = torch.LongTensor(spcidx_trg).cuda()

                trj_lat_src = np.array(torch.index_select(
                    lat_src, 0, spcidx_src).cpu().data.numpy(),
                                       dtype=np.float64)
                trj_lat_trg = np.array(torch.index_select(
                    lat_trg, 0, spcidx_trg).cpu().data.numpy(),
                                       dtype=np.float64)
                aligned_lat_srctrg, _, _, _ = dtw.dtw_org_to_trg(
                    trj_lat_src, trj_lat_trg)
                lat_dist_srctrg = np.mean(
                    np.sqrt(
                        np.mean((aligned_lat_srctrg - trj_lat_trg)**2,
                                axis=0)))
                _, _, lat_cdist_srctrg, _ = dtw.dtw_org_to_trg(trj_lat_trg,
                                                               trj_lat_src,
                                                               mcd=0)
                aligned_lat_trgsrc, _, _, _ = dtw.dtw_org_to_trg(
                    trj_lat_trg, trj_lat_src)
                lat_dist_trgsrc = np.mean(
                    np.sqrt(
                        np.mean((aligned_lat_trgsrc - trj_lat_src)**2,
                                axis=0)))
                _, _, lat_cdist_trgsrc, _ = dtw.dtw_org_to_trg(trj_lat_src,
                                                               trj_lat_trg,
                                                               mcd=0)
                logging.info("%lf %lf %lf %lf" %
                             (lat_dist_srctrg, lat_cdist_srctrg,
                              lat_dist_trgsrc, lat_cdist_trgsrc))
                lat_dist_rmse = (lat_dist_srctrg + lat_dist_trgsrc) / 2
                lat_dist_cosim = (lat_cdist_srctrg + lat_cdist_trgsrc) / 2
                lat_dist_rmse_enc_list.append(lat_dist_rmse)
                lat_dist_cosim_enc_list.append(lat_dist_cosim)
                logging.info("lat_dist_enc: %.6f %.6f" %
                             (lat_dist_rmse, lat_dist_cosim))

                trj_lat_src = np.array(torch.index_select(
                    lat_feat, 0, spcidx_src).cpu().data.numpy(),
                                       dtype=np.float64)
                trj_lat_trg = np.array(torch.index_select(
                    lat_feat_trg, 0, spcidx_trg).cpu().data.numpy(),
                                       dtype=np.float64)
                aligned_lat_srctrg, _, _, _ = dtw.dtw_org_to_trg(
                    trj_lat_src, trj_lat_trg)
                lat_dist_srctrg = np.mean(
                    np.sqrt(
                        np.mean((aligned_lat_srctrg - trj_lat_trg)**2,
                                axis=0)))
                _, _, lat_cdist_srctrg, _ = dtw.dtw_org_to_trg(trj_lat_trg,
                                                               trj_lat_src,
                                                               mcd=0)
                aligned_lat_trgsrc, _, _, _ = dtw.dtw_org_to_trg(
                    trj_lat_trg, trj_lat_src)
                lat_dist_trgsrc = np.mean(
                    np.sqrt(
                        np.mean((aligned_lat_trgsrc - trj_lat_src)**2,
                                axis=0)))
                _, _, lat_cdist_trgsrc, _ = dtw.dtw_org_to_trg(trj_lat_src,
                                                               trj_lat_trg,
                                                               mcd=0)
                logging.info("%lf %lf %lf %lf" %
                             (lat_dist_srctrg, lat_cdist_srctrg,
                              lat_dist_trgsrc, lat_cdist_trgsrc))
                lat_dist_rmse = (lat_dist_srctrg + lat_dist_trgsrc) / 2
                lat_dist_cosim = (lat_cdist_srctrg + lat_cdist_trgsrc) / 2
                lat_dist_rmse_pri_list.append(lat_dist_rmse)
                lat_dist_cosim_pri_list.append(lat_dist_cosim)
                logging.info("lat_dist_pri: %.6f %.6f" %
                             (lat_dist_rmse, lat_dist_cosim))