Exemple #1
0
    def gpu_decode(feat_list,
                   gpu,
                   cvlist=None,
                   lsd_cvlist=None,
                   lsdstd_cvlist=None,
                   cvlist_dv=None,
                   lsd_cvlist_dv=None,
                   lsdstd_cvlist_dv=None,
                   f0rmse_cvlist=None,
                   f0corr_cvlist=None,
                   caprmse_cvlist=None,
                   f0rmse_cvlist_dv=None,
                   f0corr_cvlist_dv=None,
                   caprmse_cvlist_dv=None,
                   cvlist_cyc=None,
                   lsd_cvlist_cyc=None,
                   lsdstd_cvlist_cyc=None,
                   cvlist_cyc_dv=None,
                   lsd_cvlist_cyc_dv=None,
                   lsdstd_cvlist_cyc_dv=None,
                   f0rmse_cvlist_cyc=None,
                   f0corr_cvlist_cyc=None,
                   caprmse_cvlist_cyc=None,
                   f0rmse_cvlist_cyc_dv=None,
                   f0corr_cvlist_cyc_dv=None,
                   caprmse_cvlist_cyc_dv=None):
        with torch.cuda.device(gpu):
            # define model and load parameters
            with torch.no_grad():
                model_encoder_melsp = GRU_VAE_ENCODER(
                    in_dim=config.mel_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_enc)
                logging.info(model_encoder_melsp)
                model_decoder_melsp = GRU_SPEC_DECODER(
                    feat_dim=config.lat_dim + config.lat_dim_e,
                    excit_dim=config.excit_dim,
                    out_dim=config.mel_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_dec,
                    hidden_units=config.hidden_units_dec,
                    kernel_size=config.kernel_size_dec,
                    dilation_size=config.dilation_size_dec,
                    causal_conv=config.causal_conv_dec,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_dec)
                logging.info(model_decoder_melsp)
                model_encoder_excit = GRU_VAE_ENCODER(
                    in_dim=config.mel_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim_e,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_enc)
                logging.info(model_encoder_excit)
                model_decoder_excit = GRU_EXCIT_DECODER(
                    feat_dim=config.lat_dim_e,
                    cap_dim=config.cap_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_lf0,
                    hidden_units=config.hidden_units_lf0,
                    kernel_size=config.kernel_size_lf0,
                    dilation_size=config.dilation_size_lf0,
                    causal_conv=config.causal_conv_lf0,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_lf0)
                logging.info(model_decoder_excit)
                if (config.spkidtr_dim > 0):
                    model_spkidtr = SPKID_TRANSFORM_LAYER(
                        n_spk=n_spk, spkidtr_dim=config.spkidtr_dim)
                    logging.info(model_spkidtr)
                model_encoder_melsp.load_state_dict(
                    torch.load(args.model)["model_encoder_melsp"])
                model_decoder_melsp.load_state_dict(
                    torch.load(args.model)["model_decoder_melsp"])
                model_encoder_excit.load_state_dict(
                    torch.load(args.model)["model_encoder_excit"])
                model_decoder_excit.load_state_dict(
                    torch.load(args.model)["model_decoder_excit"])
                if (config.spkidtr_dim > 0):
                    model_spkidtr.load_state_dict(
                        torch.load(args.model)["model_spkidtr"])
                model_encoder_melsp.cuda()
                model_decoder_melsp.cuda()
                model_encoder_excit.cuda()
                model_decoder_excit.cuda()
                if (config.spkidtr_dim > 0):
                    model_spkidtr.cuda()
                model_encoder_melsp.eval()
                model_decoder_melsp.eval()
                model_encoder_excit.eval()
                model_decoder_excit.eval()
                if (config.spkidtr_dim > 0):
                    model_spkidtr.eval()
                for param in model_encoder_melsp.parameters():
                    param.requires_grad = False
                for param in model_decoder_melsp.parameters():
                    param.requires_grad = False
                for param in model_encoder_excit.parameters():
                    param.requires_grad = False
                for param in model_decoder_excit.parameters():
                    param.requires_grad = False
                if (config.spkidtr_dim > 0):
                    for param in model_spkidtr.parameters():
                        param.requires_grad = False
            count = 0
            pad_left = (model_encoder_melsp.pad_left +
                        model_decoder_excit.pad_left +
                        model_decoder_melsp.pad_left) * 2
            pad_right = (model_encoder_melsp.pad_right +
                         model_decoder_excit.pad_right +
                         model_decoder_melsp.pad_right) * 2
            outpad_lefts = [None] * 5
            outpad_rights = [None] * 5
            outpad_lefts[0] = pad_left - model_encoder_melsp.pad_left
            outpad_rights[0] = pad_right - model_encoder_melsp.pad_right
            outpad_lefts[1] = outpad_lefts[0] - model_decoder_excit.pad_left
            outpad_rights[1] = outpad_rights[0] - model_decoder_excit.pad_right
            outpad_lefts[2] = outpad_lefts[1] - model_decoder_melsp.pad_left
            outpad_rights[2] = outpad_rights[1] - model_decoder_melsp.pad_right
            outpad_lefts[3] = outpad_lefts[2] - model_encoder_melsp.pad_left
            outpad_rights[3] = outpad_rights[2] - model_encoder_melsp.pad_right
            outpad_lefts[4] = outpad_lefts[3] - model_decoder_excit.pad_left
            outpad_rights[4] = outpad_rights[3] - model_decoder_excit.pad_right
            for feat_file in feat_list:
                # reconst. melsp
                logging.info("recmelsp " + feat_file)

                feat_org = read_hdf5(feat_file, "/log_1pmelmagsp")
                logging.info(feat_org.shape)

                with torch.no_grad():
                    feat = F.pad(
                        torch.FloatTensor(feat_org).cuda().unsqueeze(
                            0).transpose(1, 2), (pad_left, pad_right),
                        "replicate").transpose(1, 2)

                    spk_logits, _, lat_src, _ = model_encoder_melsp(
                        feat, sampling=False)
                    spk_logits_e, _, lat_src_e, _ = model_encoder_excit(
                        feat, sampling=False)
                    logging.info('input spkpost')
                    if outpad_rights[0] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[0]:
                                                     -outpad_rights[0]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[0]:],
                                          dim=-1), 1))
                    logging.info('input spkpost_e')
                    if outpad_rights[0] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits_e[:, outpad_lefts[0]:
                                                       -outpad_rights[0]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits_e[:, outpad_lefts[0]:],
                                          dim=-1), 1))

                    if config.spkidtr_dim > 0:
                        src_code = model_spkidtr((torch.ones(
                            (1, lat_src_e.shape[1])) * spk_idx).cuda().long())
                    else:
                        src_code = (torch.ones(
                            (1, lat_src_e.shape[1])) * spk_idx).cuda().long()
                    cvlf0_src, _ = model_decoder_excit(src_code, lat_src_e)

                    if model_decoder_excit.pad_right > 0:
                        lat_cat = torch.cat((
                            lat_src_e[:, model_decoder_excit.
                                      pad_left:-model_decoder_excit.pad_right],
                            lat_src[:, model_decoder_excit.
                                    pad_left:-model_decoder_excit.pad_right]),
                                            2)
                    else:
                        lat_cat = torch.cat(
                            (lat_src_e[:, model_decoder_excit.pad_left:],
                             lat_src[:, model_decoder_excit.pad_left:]), 2)
                    if config.spkidtr_dim > 0:
                        src_code = model_spkidtr((torch.ones(
                            (1, lat_cat.shape[1])) * spk_idx).cuda().long())
                    else:
                        src_code = (torch.ones(
                            (1, lat_cat.shape[1])) * spk_idx).cuda().long()
                    cvmelsp_src, _ = model_decoder_melsp(
                        lat_cat,
                        y=src_code,
                        e=cvlf0_src[:, :, :config.excit_dim])

                    spk_logits, _, lat_rec, _ = model_encoder_melsp(
                        cvmelsp_src, sampling=False)
                    spk_logits_e, _, lat_rec_e, _ = model_encoder_excit(
                        cvmelsp_src, sampling=False)
                    logging.info('rec spkpost')
                    if outpad_rights[3] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[3]:
                                                     -outpad_rights[3]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[3]:],
                                          dim=-1), 1))
                    logging.info('rec spkpost_e')
                    if outpad_rights[3] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits_e[:, outpad_lefts[3]:
                                                       -outpad_rights[3]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits_e[:, outpad_lefts[3]:],
                                          dim=-1), 1))

                    if config.spkidtr_dim > 0:
                        src_code = model_spkidtr((torch.ones(
                            (1, lat_rec_e.shape[1])) * spk_idx).cuda().long())
                    else:
                        src_code = (torch.ones(
                            (1, lat_rec_e.shape[1])) * spk_idx).cuda().long()
                    cvlf0_cyc, _ = model_decoder_excit(src_code, lat_rec_e)

                    if model_decoder_excit.pad_right > 0:
                        lat_cat = torch.cat((
                            lat_rec_e[:, model_decoder_excit.
                                      pad_left:-model_decoder_excit.pad_right],
                            lat_rec[:, model_decoder_excit.
                                    pad_left:-model_decoder_excit.pad_right]),
                                            2)
                    else:
                        lat_cat = torch.cat(
                            (lat_rec_e[:, model_decoder_excit.pad_left:],
                             lat_rec[:, model_decoder_excit.pad_left:]), 2)
                    if config.spkidtr_dim > 0:
                        src_code = model_spkidtr((torch.ones(
                            (1, lat_cat.shape[1])) * spk_idx).cuda().long())
                    else:
                        src_code = (torch.ones(
                            (1, lat_cat.shape[1])) * spk_idx).cuda().long()
                    cvmelsp_cyc, _ = model_decoder_melsp(
                        lat_cat,
                        y=src_code,
                        e=cvlf0_cyc[:, :, :config.excit_dim])

                    if outpad_rights[1] > 0:
                        cvlf0_src = cvlf0_src[:, outpad_lefts[1]:
                                              -outpad_rights[1]]
                    else:
                        cvlf0_src = cvlf0_src[:, outpad_lefts[1]:]
                    if outpad_rights[2] > 0:
                        cvmelsp_src = cvmelsp_src[:, outpad_lefts[2]:
                                                  -outpad_rights[2]]
                    else:
                        cvmelsp_src = cvmelsp_src[:, outpad_lefts[2]:]
                    if outpad_rights[4] > 0:
                        cvlf0_cyc = cvlf0_cyc[:, outpad_lefts[4]:
                                              -outpad_rights[4]]
                    else:
                        cvlf0_cyc = cvlf0_cyc[:, outpad_lefts[4]:]

                    feat_rec = cvmelsp_src[0].cpu().data.numpy()
                    feat_cyc = cvmelsp_cyc[0].cpu().data.numpy()

                    cvmelsp_src = np.array(cvmelsp_src[0].cpu().data.numpy(),
                                           dtype=np.float64)
                    cvlf0_src = np.array(cvlf0_src[0].cpu().data.numpy(),
                                         dtype=np.float64)

                    cvmelsp_cyc = np.array(cvmelsp_cyc[0].cpu().data.numpy(),
                                           dtype=np.float64)
                    cvlf0_cyc = np.array(cvlf0_cyc[0].cpu().data.numpy(),
                                         dtype=np.float64)

                logging.info(cvlf0_src.shape)
                logging.info(cvmelsp_src.shape)

                logging.info(cvlf0_cyc.shape)
                logging.info(cvmelsp_cyc.shape)

                melsp = np.array(feat_org)

                feat_world = read_hdf5(feat_file, "/feat_mceplf0cap")
                f0 = np.array(
                    np.rint(feat_world[:, 0]) * np.exp(feat_world[:, 1]))
                codeap = np.array(
                    np.rint(feat_world[:, 2:3]) *
                    (-np.exp(feat_world[:, 3:config.full_excit_dim])))

                cvf0_src = np.array(
                    np.rint(cvlf0_src[:, 0]) * np.exp(cvlf0_src[:, 1]))
                cvcodeap_src = np.array(
                    np.rint(cvlf0_src[:, 2:3]) * (-np.exp(cvlf0_src[:, 3:])))
                f0_rmse = np.sqrt(np.mean((cvf0_src - f0)**2))
                logging.info('F0_rmse_rec: %lf Hz' % (f0_rmse))
                cvf0_src_mean = np.mean(cvf0_src)
                f0_mean = np.mean(f0)
                f0_corr = np.sum((cvf0_src-cvf0_src_mean)*(f0-f0_mean))/\
                            (np.sqrt(np.sum((cvf0_src-cvf0_src_mean)**2))*np.sqrt(np.sum((f0-f0_mean)**2)))
                logging.info('F0_corr_rec: %lf' % (f0_corr))

                codeap_rmse = np.sqrt(
                    np.mean((cvcodeap_src - codeap)**2, axis=0))
                for i in range(codeap_rmse.shape[-1]):
                    logging.info('codeap-%d_rmse_rec: %lf dB' %
                                 (i + 1, codeap_rmse[i]))

                cvf0_cyc = np.array(
                    np.rint(cvlf0_cyc[:, 0]) * np.exp(cvlf0_cyc[:, 1]))
                cvcodeap_cyc = np.array(
                    np.rint(cvlf0_cyc[:, 2:3]) * (-np.exp(cvlf0_cyc[:, 3:])))
                f0_rmse_cyc = np.sqrt(np.mean((cvf0_cyc - f0)**2))
                logging.info('F0_rmse_cyc: %lf Hz' % (f0_rmse_cyc))
                cvf0_cyc_mean = np.mean(cvf0_cyc)
                f0_mean = np.mean(f0)
                f0_corr_cyc = np.sum((cvf0_cyc-cvf0_cyc_mean)*(f0-f0_mean))/\
                            (np.sqrt(np.sum((cvf0_cyc-cvf0_cyc_mean)**2))*np.sqrt(np.sum((f0-f0_mean)**2)))
                logging.info('F0_corr_cyc: %lf' % (f0_corr_cyc))

                codeap_rmse_cyc = np.sqrt(
                    np.mean((cvcodeap_cyc - codeap)**2, axis=0))
                for i in range(codeap_rmse_cyc.shape[-1]):
                    logging.info('codeap-%d_rmse_cyc: %lf dB' %
                                 (i + 1, codeap_rmse_cyc[i]))

                spcidx = np.array(read_hdf5(feat_file, "/spcidx_range")[0])

                melsp_rest = (np.exp(melsp) - 1) / 10000
                melsp_src_rest = (np.exp(cvmelsp_src) - 1) / 10000
                melsp_cyc_rest = (np.exp(cvmelsp_cyc) - 1) / 10000

                lsd_arr = np.sqrt(np.mean((20*(np.log10(np.clip(melsp_src_rest[spcidx], a_min=1e-16, a_max=None))\
                                                         -np.log10(np.clip(melsp_rest[spcidx], a_min=1e-16, a_max=None))))**2, axis=-1))
                lsd_mean = np.mean(lsd_arr)
                lsd_std = np.std(lsd_arr)
                logging.info("lsd_rec: %.6f dB +- %.6f" % (lsd_mean, lsd_std))

                lsd_arr = np.sqrt(np.mean((20*(np.log10(np.clip(melsp_cyc_rest[spcidx], a_min=1e-16, a_max=None))\
                                                         -np.log10(np.clip(melsp_rest[spcidx], a_min=1e-16, a_max=None))))**2, axis=-1))
                lsd_mean_cyc = np.mean(lsd_arr)
                lsd_std_cyc = np.std(lsd_arr)
                logging.info("lsd_cyc: %.6f dB +- %.6f" %
                             (lsd_mean_cyc, lsd_std_cyc))

                logging.info('org f0')
                logging.info(f0[10:15])
                logging.info('rec f0')
                logging.info(cvf0_src[10:15])
                logging.info('cyc f0')
                logging.info(cvf0_cyc[10:15])
                logging.info('org cap')
                logging.info(codeap[10:15])
                logging.info('rec cap')
                logging.info(cvcodeap_src[10:15])
                logging.info('cyc cap')
                logging.info(cvcodeap_cyc[10:15])

                dataset = feat_file.split('/')[1].split('_')[0]
                if 'tr' in dataset:
                    logging.info('trn')
                    f0rmse_cvlist.append(f0_rmse)
                    f0corr_cvlist.append(f0_corr)
                    caprmse_cvlist.append(codeap_rmse)
                    lsd_cvlist.append(lsd_mean)
                    lsdstd_cvlist.append(lsd_std)
                    cvlist.append(np.var(melsp_src_rest, axis=0))
                    logging.info(len(cvlist))
                    f0rmse_cvlist_cyc.append(f0_rmse_cyc)
                    f0corr_cvlist_cyc.append(f0_corr_cyc)
                    caprmse_cvlist_cyc.append(codeap_rmse_cyc)
                    lsd_cvlist_cyc.append(lsd_mean_cyc)
                    lsdstd_cvlist_cyc.append(lsd_std_cyc)
                    cvlist_cyc.append(np.var(melsp_cyc_rest, axis=0))
                elif 'dv' in dataset:
                    logging.info('dev')
                    f0rmse_cvlist_dv.append(f0_rmse)
                    f0corr_cvlist_dv.append(f0_corr)
                    caprmse_cvlist_dv.append(codeap_rmse)
                    lsd_cvlist_dv.append(lsd_mean)
                    lsdstd_cvlist_dv.append(lsd_std)
                    cvlist_dv.append(np.var(melsp_src_rest, axis=0))
                    logging.info(len(cvlist_dv))
                    f0rmse_cvlist_cyc_dv.append(f0_rmse_cyc)
                    f0corr_cvlist_cyc_dv.append(f0_corr_cyc)
                    caprmse_cvlist_cyc_dv.append(codeap_rmse_cyc)
                    lsd_cvlist_cyc_dv.append(lsd_mean_cyc)
                    lsdstd_cvlist_cyc_dv.append(lsd_std_cyc)
                    cvlist_cyc_dv.append(np.var(melsp_cyc_rest, axis=0))

                logging.info('write rec to h5')
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    args.spk + "-" + args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_rec.shape)
                write_hdf5(feat_file, args.string_path, feat_rec)

                logging.info('write cyc to h5')
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    args.spk + "-" + args.spk + "-" + args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_cyc.shape)
                write_hdf5(feat_file, args.string_path, feat_cyc)

                count += 1
    def gpu_decode(feat_list, gpu, cvlist=None, mcdpow_cvlist=None, mcdpowstd_cvlist=None, mcd_cvlist=None, \
                    mcdstd_cvlist=None, cvlist_dv=None, mcdpow_cvlist_dv=None, mcdpowstd_cvlist_dv=None, \
                    mcd_cvlist_dv=None, mcdstd_cvlist_dv=None, \
                    f0rmse_cvlist=None, f0corr_cvlist=None, caprmse_cvlist=None, \
                    f0rmse_cvlist_dv=None, f0corr_cvlist_dv=None, caprmse_cvlist_dv=None, \
                    cvlist_cyc=None, mcdpow_cvlist_cyc=None, mcdpowstd_cvlist_cyc=None, mcd_cvlist_cyc=None, \
                    mcdstd_cvlist_cyc=None, cvlist_cyc_dv=None, mcdpow_cvlist_cyc_dv=None, mcdpowstd_cvlist_cyc_dv=None, \
                    mcd_cvlist_cyc_dv=None, mcdstd_cvlist_cyc_dv=None, \
                    f0rmse_cvlist_cyc=None, f0corr_cvlist_cyc=None, caprmse_cvlist_cyc=None, \
                    f0rmse_cvlist_cyc_dv=None, f0corr_cvlist_cyc_dv=None, caprmse_cvlist_cyc_dv=None):
        with torch.cuda.device(gpu):
            # define model and load parameters
            with torch.no_grad():
                model_encoder_mcep = GRU_VAE_ENCODER(
                    in_dim=config.mcep_dim+config.excit_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=config.bi_enc,
                    cont=False,
                    pad_first=True,
                    right_size=config.right_size,
                    ar=config.ar_enc)
                logging.info(model_encoder_mcep)
                model_decoder_mcep = GRU_SPEC_DECODER(
                    feat_dim=config.lat_dim,
                    out_dim=config.mcep_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_dec,
                    hidden_units=config.hidden_units_dec,
                    kernel_size=config.kernel_size_dec,
                    dilation_size=config.dilation_size_dec,
                    causal_conv=config.causal_conv_dec,
                    bi=config.bi_dec,
                    spkidtr_dim=config.spkidtr_dim,
                    pad_first=True,
                    ar=config.ar_dec)
                logging.info(model_decoder_mcep)
                model_encoder_excit = GRU_VAE_ENCODER(
                    in_dim=config.mcep_dim+config.excit_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim_e,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=config.bi_enc,
                    cont=False,
                    pad_first=True,
                    right_size=config.right_size,
                    ar=config.ar_enc)
                logging.info(model_encoder_excit)
                model_decoder_excit = GRU_EXCIT_DECODER(
                    feat_dim=config.lat_dim_e,
                    cap_dim=config.cap_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_lf0,
                    hidden_units=config.hidden_units_lf0,
                    kernel_size=config.kernel_size_lf0,
                    dilation_size=config.dilation_size_lf0,
                    causal_conv=config.causal_conv_lf0,
                    bi=config.bi_lf0,
                    spkidtr_dim=config.spkidtr_dim,
                    pad_first=True,
                    ar=config.ar_f0)
                logging.info(model_decoder_excit)
                model_vq = torch.nn.Embedding(config.ctr_size, config.lat_dim)
                logging.info(model_vq)
                model_encoder_mcep.load_state_dict(torch.load(args.model)["model_encoder_mcep"])
                model_decoder_mcep.load_state_dict(torch.load(args.model)["model_decoder_mcep"])
                model_encoder_excit.load_state_dict(torch.load(args.model)["model_encoder_excit"])
                model_decoder_excit.load_state_dict(torch.load(args.model)["model_decoder_excit"])
                model_vq.load_state_dict(torch.load(args.model)["model_vq"])
                model_encoder_mcep.cuda()
                model_decoder_mcep.cuda()
                model_encoder_excit.cuda()
                model_decoder_excit.cuda()
                model_vq.cuda()
                model_encoder_mcep.eval()
                model_decoder_mcep.eval()
                model_encoder_excit.eval()
                model_decoder_excit.eval()
                model_vq.eval()
                for param in model_encoder_mcep.parameters():
                    param.requires_grad = False
                for param in model_decoder_mcep.parameters():
                    param.requires_grad = False
                for param in model_encoder_excit.parameters():
                    param.requires_grad = False
                for param in model_decoder_excit.parameters():
                    param.requires_grad = False
                for param in model_vq.parameters():
                    param.requires_grad = False
                if config.ar_enc:
                    yz_in = torch.zeros((1, 1, n_spk+config.lat_dim)).cuda()
                    yz_in_e = torch.zeros((1, 1, n_spk+config.lat_dim_e)).cuda()
                if config.ar_dec or config.ar_f0:
                    mean_stats = torch.FloatTensor(read_hdf5(config.stats, "/mean_"+config.string_path.replace("/","")))
                    scale_stats = torch.FloatTensor(read_hdf5(config.stats, "/scale_"+config.string_path.replace("/","")))
                if config.ar_dec:
                    x_in = ((torch.zeros((1, 1, config.mcep_dim))-mean_stats[config.excit_dim:])/scale_stats[config.excit_dim:]).cuda()
                if config.ar_f0:
                    e_in = torch.cat((torch.zeros(1,1,1), (torch.zeros(1,1,1)-mean_stats[1:2])/scale_stats[1:2], \
                                    torch.zeros(1,1,1), (torch.zeros(1,1,config.cap_dim)-mean_stats[3:config.excit_dim])/scale_stats[3:config.excit_dim]), 2).cuda()
            count = 0
            pad_left = (model_encoder_mcep.pad_left + model_decoder_mcep.pad_left)*2
            pad_right = (model_encoder_mcep.pad_right + model_decoder_mcep.pad_right)*2
            outpad_lefts = [None]*3
            outpad_rights = [None]*3
            outpad_lefts[0] = pad_left-model_encoder_mcep.pad_left
            outpad_rights[0] = pad_right-model_encoder_mcep.pad_right
            outpad_lefts[1] = outpad_lefts[0]-model_decoder_mcep.pad_left
            outpad_rights[1] = outpad_rights[0]-model_decoder_mcep.pad_right
            outpad_lefts[2] = outpad_lefts[1]-model_encoder_mcep.pad_left
            outpad_rights[2] = outpad_rights[1]-model_encoder_mcep.pad_right
            for feat_file in feat_list:
                # convert mcep
                logging.info("recmcep " + feat_file)

                feat_org = read_hdf5(feat_file, "/feat_mceplf0cap")
                logging.info(feat_org.shape)

                with torch.no_grad():
                    feat = F.pad(torch.FloatTensor(feat_org).cuda().unsqueeze(0).transpose(1,2), (pad_left,pad_right), "replicate").transpose(1,2)

                    if config.ar_enc:
                        spk_logits, lat_src, _, _ = model_encoder_mcep(feat, yz_in=yz_in)
                        spk_logits_e, lat_src_e, _, _ = model_encoder_excit(feat, yz_in=yz_in)
                    else:
                        spk_logits, lat_src, _ = model_encoder_mcep(feat)
                        spk_logits_e, lat_src_e, _ = model_encoder_excit(feat)
                    idx_vq = nn_search_batch(lat_src, model_vq.weight)
                    lat_src = model_vq(idx_vq)
                    if outpad_rights[0] > 0:
                        unique, counts = np.unique(idx_vq[:,outpad_lefts[0]:-outpad_rights[0]].cpu().data.numpy(), return_counts=True)
                    else:
                        unique, counts = np.unique(idx_vq[:,outpad_lefts[0]:].cpu().data.numpy(), return_counts=True)
                    logging.info("input vq")
                    logging.info(dict(zip(unique, counts)))
                    idx_vq_e = nn_search_batch(lat_src_e, model_vq.weight)
                    lat_src_e = model_vq(idx_vq_e)
                    if outpad_rights[0] > 0:
                        unique, counts = np.unique(idx_vq_e[:,outpad_lefts[0]:-outpad_rights[0]].cpu().data.numpy(), return_counts=True)
                    else:
                        unique, counts = np.unique(idx_vq_e[:,outpad_lefts[0]:].cpu().data.numpy(), return_counts=True)
                    logging.info("input vq_e")
                    logging.info(dict(zip(unique, counts)))
                    logging.info('input spkpost')
                    if outpad_rights[0] > 0:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[0]:-outpad_rights[0]], dim=-1), 1))
                    else:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[0]:], dim=-1), 1))
                    logging.info('input spkpost_e')
                    if outpad_rights[0] > 0:
                        logging.info(torch.mean(F.softmax(spk_logits_e[:,outpad_lefts[0]:-outpad_rights[0]], dim=-1), 1))
                    else:
                        logging.info(torch.mean(F.softmax(spk_logits_e[:,outpad_lefts[0]:], dim=-1), 1))

                    src_code = (torch.ones((1, lat_src.shape[1]))*spk_idx).cuda().long()

                    if config.ar_dec:
                        cvmcep_src, _, _ = model_decoder_mcep(src_code, lat_src, x_in=x_in)
                    else:
                        cvmcep_src, _ = model_decoder_mcep(src_code, lat_src)
                    if config.ar_f0:
                        cvlf0_src, _, _ = model_decoder_excit(src_code, lat_src_e, e_in=e_in)
                    else:
                        cvlf0_src, _ = model_decoder_excit(src_code, lat_src_e)

                    cv_feat = torch.cat((cvlf0_src, cvmcep_src), 2)
                    if config.ar_enc:
                        spk_logits, lat_rec, _, _ = model_encoder_mcep(cv_feat, yz_in=yz_in)
                        spk_logits_e, lat_rec_e, _, _ = model_encoder_excit(cv_feat, yz_in=yz_in)
                    else:
                        spk_logits, lat_rec, _ = model_encoder_mcep(cv_feat)
                        spk_logits_e, lat_rec_e, _ = model_encoder_excit(cv_feat)
                    idx_vq = nn_search_batch(lat_rec, model_vq.weight)
                    lat_rec = model_vq(idx_vq)
                    if outpad_rights[2] > 0:
                        unique, counts = np.unique(idx_vq[:,outpad_lefts[2]:-outpad_rights[2]].cpu().data.numpy(), return_counts=True)
                    else:
                        unique, counts = np.unique(idx_vq[:,outpad_lefts[2]:].cpu().data.numpy(), return_counts=True)
                    logging.info("input vq")
                    logging.info(dict(zip(unique, counts)))
                    idx_vq_e = nn_search_batch(lat_rec_e, model_vq.weight)
                    lat_rec_e = model_vq(idx_vq_e)
                    if outpad_rights[2] > 0:
                        unique, counts = np.unique(idx_vq_e[:,outpad_lefts[2]:-outpad_rights[2]].cpu().data.numpy(), return_counts=True)
                    else:
                        unique, counts = np.unique(idx_vq_e[:,outpad_lefts[2]:].cpu().data.numpy(), return_counts=True)
                    logging.info("input vq_e")
                    logging.info(dict(zip(unique, counts)))
                    logging.info('rec spkpost')
                    if outpad_rights[2] > 0:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[2]:-outpad_rights[2]], dim=-1), 1))
                    else:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[2]:], dim=-1), 1))
                    logging.info('rec spkpost_e')
                    if outpad_rights[2] > 0:
                        logging.info(torch.mean(F.softmax(spk_logits_e[:,outpad_lefts[2]:-outpad_rights[2]], dim=-1), 1))
                    else:
                        logging.info(torch.mean(F.softmax(spk_logits_e[:,outpad_lefts[2]:], dim=-1), 1))

                    src_code = (torch.ones((1, lat_rec.shape[1]))*spk_idx).cuda().long()

                    if config.ar_dec:
                        cvmcep_cyc, _, _ = model_decoder_mcep(src_code, lat_rec, x_in=x_in)
                    else:
                        cvmcep_cyc, _ = model_decoder_mcep(src_code, lat_rec)
                    if config.ar_f0:
                        cvlf0_cyc, _, _ = model_decoder_excit(src_code, lat_rec_e, e_in=e_in)
                    else:
                        cvlf0_cyc, _ = model_decoder_excit(src_code, lat_rec_e)

                    if outpad_rights[1] > 0:
                        cvmcep_src = cvmcep_src[:,outpad_lefts[1]:-outpad_rights[1]]
                        cvlf0_src = cvlf0_src[:,outpad_lefts[1]:-outpad_rights[1]]
                    else:
                        cvmcep_src = cvmcep_src[:,outpad_lefts[1]:]
                        cvlf0_src = cvlf0_src[:,outpad_lefts[1]:]

                    feat_rec = torch.cat((torch.round(cvlf0_src[:,:,:1]), cvlf0_src[:,:,1:2], \
                                            torch.round(cvlf0_src[:,:,2:3]), cvlf0_src[:,:,3:], cvmcep_src), \
                                                2)[0].cpu().data.numpy()
                    feat_cyc = torch.cat((torch.round(cvlf0_cyc[:,:,:1]), cvlf0_cyc[:,:,1:2], \
                                            torch.round(cvlf0_cyc[:,:,2:3]), cvlf0_cyc[:,:,3:], cvmcep_cyc), \
                                                2)[0].cpu().data.numpy()

                    cvmcep_src = np.array(cvmcep_src[0].cpu().data.numpy(), dtype=np.float64)
                    cvlf0_src = np.array(cvlf0_src[0].cpu().data.numpy(), dtype=np.float64)

                    cvmcep_cyc = np.array(cvmcep_cyc[0].cpu().data.numpy(), dtype=np.float64)
                    cvlf0_cyc = np.array(cvlf0_cyc[0].cpu().data.numpy(), dtype=np.float64)

                logging.info(cvlf0_src.shape)
                logging.info(cvmcep_src.shape)

                logging.info(cvlf0_cyc.shape)
                logging.info(cvmcep_cyc.shape)

                mcep = np.array(feat_org[:,-model_decoder_mcep.out_dim:])
                f0 = np.array(np.rint(feat_org[:,0])*np.exp(feat_org[:,1]))
                codeap = np.array(np.rint(feat_org[:,2:3])*(-np.exp(feat_org[:,3:feat_org.shape[-1]-model_decoder_mcep.out_dim])))
 
                cvf0_src = np.array(np.rint(cvlf0_src[:,0])*np.exp(cvlf0_src[:,1]))
                cvcodeap_src = np.array(np.rint(cvlf0_src[:,2:3])*(-np.exp(cvlf0_src[:,3:])))
                f0_rmse = np.sqrt(np.mean((cvf0_src-f0)**2))
                logging.info('F0_rmse_rec: %lf Hz' % (f0_rmse))
                cvf0_src_mean = np.mean(cvf0_src)
                f0_mean = np.mean(f0)
                f0_corr = np.sum((cvf0_src-cvf0_src_mean)*(f0-f0_mean))/\
                            (np.sqrt(np.sum((cvf0_src-cvf0_src_mean)**2))*np.sqrt(np.sum((f0-f0_mean)**2)))
                logging.info('F0_corr_rec: %lf' % (f0_corr))

                codeap_rmse = np.sqrt(np.mean((cvcodeap_src-codeap)**2, axis=0))
                for i in range(codeap_rmse.shape[-1]):
                    logging.info('codeap-%d_rmse_rec: %lf dB' % (i+1, codeap_rmse[i]))

                cvf0_cyc = np.array(np.rint(cvlf0_cyc[:,0])*np.exp(cvlf0_cyc[:,1]))
                cvcodeap_cyc = np.array(np.rint(cvlf0_cyc[:,2:3])*(-np.exp(cvlf0_cyc[:,3:])))
                f0_rmse_cyc = np.sqrt(np.mean((cvf0_cyc-f0)**2))
                logging.info('F0_rmse_cyc: %lf Hz' % (f0_rmse_cyc))
                cvf0_cyc_mean = np.mean(cvf0_cyc)
                f0_mean = np.mean(f0)
                f0_corr_cyc = np.sum((cvf0_cyc-cvf0_cyc_mean)*(f0-f0_mean))/\
                            (np.sqrt(np.sum((cvf0_cyc-cvf0_cyc_mean)**2))*np.sqrt(np.sum((f0-f0_mean)**2)))
                logging.info('F0_corr_cyc: %lf' % (f0_corr_cyc))

                codeap_rmse_cyc = np.sqrt(np.mean((cvcodeap_cyc-codeap)**2, axis=0))
                for i in range(codeap_rmse_cyc.shape[-1]):
                    logging.info('codeap-%d_rmse_cyc: %lf dB' % (i+1, codeap_rmse_cyc[i]))

                spcidx = read_hdf5(feat_file, "/spcidx_range")[0]

                _, _, _, mcdpow_arr = dtw.dtw_org_to_trg(np.array(cvmcep_src[np.array(spcidx),:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),:], dtype=np.float64))
                _, _, _, mcd_arr = dtw.dtw_org_to_trg(np.array(cvmcep_src[np.array(spcidx),1:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                logging.info("mcdpow_rec: %.6f dB +- %.6f" % (mcdpow_mean, mcdpow_std))
                logging.info("mcd_rec: %.6f dB +- %.6f" % (mcd_mean, mcd_std))

                _, _, _, mcdpow_arr = dtw.dtw_org_to_trg(np.array(cvmcep_cyc[np.array(spcidx),:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),:], dtype=np.float64))
                _, _, _, mcd_arr = dtw.dtw_org_to_trg(np.array(cvmcep_cyc[np.array(spcidx),1:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean_cyc = np.mean(mcdpow_arr)
                mcdpow_std_cyc = np.std(mcdpow_arr)
                mcd_mean_cyc = np.mean(mcd_arr)
                mcd_std_cyc = np.std(mcd_arr)
                logging.info("mcdpow_cyc: %.6f dB +- %.6f" % (mcdpow_mean_cyc, mcdpow_std_cyc))
                logging.info("mcd_cyc: %.6f dB +- %.6f" % (mcd_mean_cyc, mcd_std_cyc))
            
                logging.info('org f0')
                logging.info(f0[10:15])
                logging.info('rec f0')
                logging.info(cvf0_src[10:15])
                logging.info('cyc f0')
                logging.info(cvf0_cyc[10:15])
                logging.info('org cap')
                logging.info(codeap[10:15])
                logging.info('rec cap')
                logging.info(cvcodeap_src[10:15])
                logging.info('cyc cap')
                logging.info(cvcodeap_cyc[10:15])

                dataset = feat_file.split('/')[1].split('_')[0]
                if 'tr' in dataset:
                    logging.info('trn')
                    f0rmse_cvlist.append(f0_rmse)
                    f0corr_cvlist.append(f0_corr)
                    caprmse_cvlist.append(codeap_rmse)
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpowstd_cvlist.append(mcdpow_std)
                    mcd_cvlist.append(mcd_mean)
                    mcdstd_cvlist.append(mcd_std)
                    cvlist.append(np.var(cvmcep_src[:,1:], axis=0))
                    logging.info(len(cvlist))
                    f0rmse_cvlist_cyc.append(f0_rmse_cyc)
                    f0corr_cvlist_cyc.append(f0_corr_cyc)
                    caprmse_cvlist_cyc.append(codeap_rmse_cyc)
                    mcdpow_cvlist_cyc.append(mcdpow_mean_cyc)
                    mcdpow_cvlist_cyc.append(mcdpow_mean_cyc)
                    mcdpowstd_cvlist_cyc.append(mcdpow_std_cyc)
                    mcd_cvlist_cyc.append(mcd_mean_cyc)
                    mcdstd_cvlist_cyc.append(mcd_std_cyc)
                    cvlist_cyc.append(np.var(cvmcep_cyc[:,1:], axis=0))
                elif 'dv' in dataset:
                    logging.info('dev')
                    f0rmse_cvlist_dv.append(f0_rmse)
                    f0corr_cvlist_dv.append(f0_corr)
                    caprmse_cvlist_dv.append(codeap_rmse)
                    mcdpow_cvlist_dv.append(mcdpow_mean)
                    mcdpowstd_cvlist_dv.append(mcdpow_std)
                    mcd_cvlist_dv.append(mcd_mean)
                    mcdstd_cvlist_dv.append(mcd_std)
                    cvlist_dv.append(np.var(cvmcep_src[:,1:], axis=0))
                    logging.info(len(cvlist_dv))
                    f0rmse_cvlist_cyc_dv.append(f0_rmse_cyc)
                    f0corr_cvlist_cyc_dv.append(f0_corr_cyc)
                    caprmse_cvlist_cyc_dv.append(codeap_rmse_cyc)
                    mcdpow_cvlist_cyc_dv.append(mcdpow_mean_cyc)
                    mcdpow_cvlist_cyc_dv.append(mcdpow_mean_cyc)
                    mcdpowstd_cvlist_cyc_dv.append(mcdpow_std_cyc)
                    mcd_cvlist_cyc_dv.append(mcd_mean_cyc)
                    mcdstd_cvlist_cyc_dv.append(mcd_std_cyc)
                    cvlist_cyc_dv.append(np.var(cvmcep_cyc[:,1:], axis=0))

                logging.info('write rec to h5')
                outh5dir = os.path.join(os.path.dirname(os.path.dirname(feat_file)), args.spk+"-"+args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_rec.shape)
                write_hdf5(feat_file, args.string_path, feat_rec)

                logging.info('write cyc to h5')
                outh5dir = os.path.join(os.path.dirname(os.path.dirname(feat_file)), args.spk+"-"+args.spk+"-"+args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_cyc.shape)
                write_hdf5(feat_file, args.string_path, feat_cyc)

                count += 1
def main():
    parser = argparse.ArgumentParser()
    # decode setting
    parser.add_argument("--model",
                        required=True,
                        type=str,
                        help="GRU_RNN model file")
    parser.add_argument("--config",
                        required=True,
                        type=str,
                        help="GRU_RNN configure file")
    parser.add_argument("--outdir",
                        required=True,
                        type=str,
                        help="directory to save generated samples")
    # other setting
    #parser.add_argument("--GPU_device", default=None,
    #                    type=int, help="selection of GPU device")
    #parser.add_argument("--GPU_device_str", default=None,
    #                    type=str, help="selection of GPU device")
    parser.add_argument("--verbose",
                        default=VERBOSE,
                        type=int,
                        help="log level")
    args = parser.parse_args()

    #if args.GPU_device is not None or args.GPU_device_str is not None:
    #    os.environ["CUDA_DEVICE_ORDER"]		= "PCI_BUS_ID"
    #    if args.GPU_device_str is None:
    #        os.environ["CUDA_VISIBLE_DEVICES"]	= str(args.GPU_device)
    #    else:
    #        os.environ["CUDA_VISIBLE_DEVICES"]	= args.GPU_device_str
    os.environ["CUDA_VISIBLE_DEVICES"] = ""

    # check directory existence
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    # set log level
    if args.verbose > 0:
        logging.basicConfig(
            level=logging.INFO,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filemode='w',
            filename=args.outdir + "/decode.log")
        logging.getLogger().addHandler(logging.StreamHandler())
    elif args.verbose > 1:
        logging.basicConfig(
            level=logging.DEBUG,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filemode='w',
            filename=args.outdir + "/decode.log")
        logging.getLogger().addHandler(logging.StreamHandler())
    else:
        logging.basicConfig(
            level=logging.WARN,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filemode='w',
            filename=args.outdir + "/decode.log")
        logging.getLogger().addHandler(logging.StreamHandler())
        logging.warn("logging is disabled.")

    # load config
    config = torch.load(args.config)

    spk_list = config.spk_list.split('@')
    n_spk = len(spk_list)

    model_epoch = os.path.basename(args.model).split('.')[0].split('-')[1]
    logging.info('epoch: ' + model_epoch)

    device = torch.device("cpu")
    #with torch.cuda.device(0):
    # define model and load parameters
    with torch.no_grad():
        model_decoder_mcep = GRU_SPEC_DECODER(
            feat_dim=config.lat_dim,
            out_dim=config.mcep_dim,
            n_spk=n_spk,
            hidden_layers=config.hidden_layers_dec,
            hidden_units=config.hidden_units_dec,
            kernel_size=config.kernel_size_dec,
            dilation_size=config.dilation_size_dec,
            causal_conv=config.causal_conv_dec,
            bi=config.bi_dec,
            spkidtr_dim=config.spkidtr_dim,
            pad_first=True,
            ar=config.ar_dec)
        logging.info(model_decoder_mcep)
        model_decoder_excit = GRU_EXCIT_DECODER(
            feat_dim=config.lat_dim,
            cap_dim=config.cap_dim,
            n_spk=n_spk,
            hidden_layers=config.hidden_layers_dec,
            hidden_units=config.hidden_units_dec,
            kernel_size=config.kernel_size_dec,
            dilation_size=config.dilation_size_dec,
            causal_conv=config.causal_conv_dec,
            bi=config.bi_dec,
            spkidtr_dim=config.spkidtr_dim,
            pad_first=True,
            ar=config.ar_dec)
        logging.info(model_decoder_excit)
        model_decoder_mcep.load_state_dict(
            torch.load(args.model, map_location=device)["model_decoder_mcep"])
        model_decoder_excit.load_state_dict(
            torch.load(args.model, map_location=device)["model_decoder_excit"])
        #model_decoder_mcep.cuda()
        #model_decoder_excit.cuda()
        model_decoder_mcep.eval()
        model_decoder_excit.eval()
        for param in model_decoder_mcep.parameters():
            param.requires_grad = False
        for param in model_decoder_excit.parameters():
            param.requires_grad = False

        #feat = torch.LongTensor(np.arange(n_spk)).cuda().unsqueeze(0)
        feat = torch.LongTensor(np.arange(n_spk)).unsqueeze(0)
        logging.info(feat)

        logging.info(spk_list)

        colormap = np.array(['b', 'r'])
        male = ['bdl', 'p237', 'p245', 'p251', 'p252', 'p259', 'p274', 'p304', 'p311', 'p326', 'p345', 'p360', 'p363', \
                    'SEM1', 'SEM2', 'TFM1', 'TGM1', 'TMM1', 'TEM1', 'TEM2', \
                        'VCC2SM1', 'VCC2SM2', 'VCC2SM3', 'VCC2TM1', 'VCC2TM2', 'VCC2SM4']
        female = ['slt', 'p231', 'p238', 'p248', 'p253', 'p264', 'p265', 'p266', 'p276', 'p305', 'p308', 'p318', 'p335', \
                    'SEF1', 'SEF2', 'TEF1', 'TEF2', 'TFF1', 'TGF1', 'TMF1', \
                        'VCC2SF1', 'VCC2SF2', 'VCC2SF3', 'VCC2TF1', 'VCC2TF2', 'VCC2SF4']
        gender = []
        for i in range(len(spk_list)):
            if spk_list[i] in male:
                gender.append(0)
            elif spk_list[i] in female:
                gender.append(1)
            else:
                logging.info('error %s not in gender list' % (spk_list[i]))
                exit()

        z = model_decoder_mcep.spkidtr_conv(
            F.one_hot(feat,
                      num_classes=n_spk).float().transpose(1,
                                                           2)).transpose(1, 2)
        #z_rec = model_decoder_mcep.spkidtr_deconv(z.transpose(1,2)).transpose(1,2)
        logging.info(z)

        logging.info(args.outdir)

        #plt.rcParams["figure.figsize"] = (20,11.25) #1920x1080
        plt.rcParams["figure.figsize"] = (11.25, 11.25)  #1080x1080
        #plt.rcParams["figure.figsize"] = (14.229166667,14.229166667) #1366x1366

        #z = z.cpu().data.numpy()
        z = z.data.numpy()
        logging.info(z.shape)
        x = z[0, :, 0]
        y = z[0, :, 1]
        fig, ax = plt.subplots()
        ax.scatter(x, y, s=40, c=colormap[gender])
        for i, txt in enumerate(spk_list):
            ax.annotate(txt, (x[i], y[i]))
        plt.savefig(os.path.join(args.outdir, 'spect.png'))
        plt.close()

        z_e = model_decoder_excit.spkidtr_conv(
            F.one_hot(feat,
                      num_classes=n_spk).float().transpose(1,
                                                           2)).transpose(1, 2)
        #z_e_rec = model_decoder_excit.spkidtr_deconv(z_e.transpose(1,2)).transpose(1,2)
        logging.info(z_e)

        #z_e = z_e.cpu().data.numpy()
        z_e = z_e.data.numpy()
        x = z_e[0, :, 0]
        y = z_e[0, :, 1]
        fig, ax = plt.subplots()
        ax.scatter(x, y, s=40, c=colormap[gender])
        for i, txt in enumerate(spk_list):
            ax.annotate(txt, (x[i], y[i]))
        plt.savefig(os.path.join(args.outdir, 'excit.png'))
        plt.close()