Example #1
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("--feats",
                        default=None,
                        required=True,
                        help="name of the list of hdf5 files")
    parser.add_argument("--stats",
                        default=None,
                        required=True,
                        help="filename of hdf5 format")

    args = parser.parse_args()

    # read list and define scaler
    filenames = read_txt(args.feats)
    scaler = StandardScaler()
    print("number of training utterances =", len(filenames))

    # process over all of data
    for filename in filenames:
        feat = read_hdf5(filename, "/feat_org")
        scaler.partial_fit(feat[:, 1:])

    # add uv term
    mean = np.zeros((feat.shape[1]))
    scale = np.ones((feat.shape[1]))
    mean[1:] = scaler.mean_
    scale[1:] = scaler.scale_

    # write to hdf5
    write_hdf5(args.stats, "/mean", mean)
    write_hdf5(args.stats, "/scale", scale)
Example #2
0
def melcepstrum_extract(wav_list, args):
    """EXTRACT MEL CEPSTRUM"""
    # define feature extractor
    for i, wav_name in enumerate(wav_list):
        logging.info("now processing %s (%d/%d)" %
                     (wav_name, i + 1, len(wav_list)))

        # load wavfile and apply low cut filter
        fs, x = wavfile.read(wav_name)
        if x.dtype != np.int16:
            logging.warn("wav file format is not 16 bit PCM.")
        x = np.array(x, dtype=np.float64)
        if args.highpass_cutoff != 0:
            x = low_cut_filter(x, fs, cutoff=args.highpass_cutoff)

        # check sampling frequency
        if not fs == args.fs:
            logging.error("sampling frequency is not matched.")
            sys.exit(1)

        # extract features
        shiftl = int(args.shiftms * fs * 0.001)
        mcep = stft_mcep(x, args.fftl, shiftl, args.mcep_dim, args.mcep_alpha)

        # save to hdf5
        hdf5name = args.hdf5dir + "/" + os.path.basename(wav_name).replace(
            ".wav", ".h5")
        write_hdf5(hdf5name, "/mcep", np.float32(mcep))

        # overwrite wav file
        if args.highpass_cutoff != 0 and args.save_wav:
            wavfile.write(args.wavdir + "/" + os.path.basename(wav_name), fs,
                          np.int16(x))
Example #3
0
def world_feature_extract(wav_list, args):
    """EXTRACT WORLD FEATURE VECTOR"""
    # define feature extractor
    feature_extractor = FeatureExtractor(analyzer="world",
                                         fs=args.fs,
                                         shiftms=args.shiftms,
                                         minf0=args.minf0,
                                         maxf0=args.maxf0,
                                         fftl=args.fftl)

    for i, wav_name in enumerate(wav_list):
        logging.info("now processing %s (%d/%d)" %
                     (wav_name, i + 1, len(wav_list)))
        # load wavfile and apply low cut filter
        fs, x = wavfile.read(wav_name)
        x = np.array(x, dtype=np.float32)
        if args.highpass_cutoff != 0:
            x = low_cut_filter(x, fs, cutoff=args.highpass_cutoff)

        # check sampling frequency
        if not fs == args.fs:
            logging.error("sampling frequency is not matched.")
            sys.exit(1)

        # extract features
        f0, _, _ = feature_extractor.analyze(x)
        uv, cont_f0 = convert_continuos_f0(f0)
        cont_f0_lpf = low_pass_filter(cont_f0,
                                      int(1.0 / (args.shiftms * 0.001)),
                                      cutoff=20)
        codeap = feature_extractor.codeap()
        mcep = feature_extractor.mcep(dim=args.mcep_dim, alpha=args.mcep_alpha)

        # concatenate
        cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1)
        uv = np.expand_dims(uv, axis=-1)
        feats = np.concatenate([uv, cont_f0_lpf, mcep, codeap], axis=1)

        # save to hdf5
        hdf5name = args.hdf5dir + "/" + os.path.basename(wav_name).replace(
            ".wav", ".h5")
        write_hdf5(hdf5name, "/feat_org", feats)
        if args.save_extended:
            # extend time resolution
            upsampling_factor = int(args.shiftms * fs * 0.001)
            feats_extended = extend_time(feats, upsampling_factor)
            feats_extended = feats_extended.astype(np.float32)
            write_hdf5(hdf5name, "/feat", feats_extended)

        # overwrite wav file
        if args.highpass_cutoff != 0:
            wavfile.write(args.wavdir + "/" + os.path.basename(wav_name), fs,
                          np.int16(x))
Example #4
0
    def feature_extract(wav_list):
        for wav_name in wav_list:
            # load wavfile and apply low cut filter
            fs, x = wavfile.read(wav_name)
            x = np.array(x, dtype=np.float32)
            if args.highpass_cutoff != 0:
                x = low_cut_filter(x, fs, cutoff=args.highpass_cutoff)

            # check sampling frequency
            if not fs == args.fs:
                print("ERROR: sampling frequency is not matched.")
                sys.exit(1)

            # extract features
            f0, spc, ap = feature_extractor.analyze(x)
            uv, cont_f0 = convert_continuos_f0(f0)
            cont_f0_lpf = low_pass_filter(cont_f0,
                                          int(1.0 / (args.shiftms * 0.001)),
                                          cutoff=20)
            codeap = feature_extractor.codeap()
            mcep = feature_extractor.mcep(dim=args.mcep_dim,
                                          alpha=args.mcep_alpha)

            # concatenate
            cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1)
            uv = np.expand_dims(uv, axis=-1)
            feats = np.concatenate([uv, cont_f0_lpf, mcep, codeap], axis=1)

            # extend time resolution
            upsampling_factor = int(args.shiftms * fs * 0.001)
            feats_extended = extend_time(feats, upsampling_factor)

            # save to hdf5
            feats_extended = feats_extended.astype(np.float32)
            hdf5name = args.hdf5dir + "/" + os.path.basename(wav_name).replace(
                ".wav", ".h5")
            write_hdf5(hdf5name, "/feat_org", feats)
            write_hdf5(hdf5name, "/feat", feats_extended)

            # overwrite wav file
            if args.highpass_cutoff != 0:
                wavfile.write(args.wavdir + "/" + os.path.basename(wav_name),
                              fs, np.int16(x))
Example #5
0
def calc_stats(file_list, args):
    """CALCULATE STATISTICS"""
    scaler = StandardScaler()

    # process over all of data
    for i, filename in enumerate(file_list):
        logging.info("now processing %s (%d/%d)" % (filename, i + 1, len(file_list)))
        feat = read_hdf5(filename, "/" + args.feature_type)
        scaler.partial_fit(feat)

    # add uv term
    mean = scaler.mean_
    scale = scaler.scale_
    if args.feature_type == "world":
        mean[0] = 0.0
        scale[0] = 1.0

    # write to hdf5
    write_hdf5(args.stats, "/" + args.feature_type + "/mean", np.float32(mean))
    write_hdf5(args.stats, "/" + args.feature_type + "/scale", np.float32(scale))
Example #6
0
def calc_stats(file_list, args):
    """CALCULATE STATISTICS"""
    scaler = StandardScaler()

    # process over all of data
    for i, filename in enumerate(file_list):
        logging.info("now processing %s (%d/%d)" %
                     (filename, i + 1, len(file_list)))
        feat = read_hdf5(filename, "/%s" % args.feature_type)
        scaler.partial_fit(feat[:, 1:])

    # add uv term
    mean = np.zeros((feat.shape[1]))
    scale = np.ones((feat.shape[1]))
    mean[1:] = scaler.mean_
    scale[1:] = scaler.scale_

    # write to hdf5
    write_hdf5(args.stats, "/%s/mean" % args.feature_type, mean)
    write_hdf5(args.stats, "/%s/scale" % args.feature_type, scale)
def melspectrogram_extract(wav_list, args):
    """EXTRACT MEL SPECTROGRAM"""
    # define feature extractor
    for i, wav_name in enumerate(wav_list):
        logging.info("now processing %s (%d/%d)" %
                     (wav_name, i + 1, len(wav_list)))

        # load wavfile and apply low cut filter
        fs, x = wavfile.read(wav_name)
        if x.dtype != np.int16:
            logging.warn("wav file format is not 16 bit PCM.")
        x = np.array(x, dtype=np.float64)
        if args.highpass_cutoff != 0:
            x = low_cut_filter(x, fs, cutoff=args.highpass_cutoff)

        # check sampling frequency
        if not fs == args.fs:
            logging.error("sampling frequency is not matched.")
            sys.exit(1)

        # extract features
        x_norm = x / (np.iinfo(np.int16).max + 1)
        shiftl = int(args.shiftms * fs * 0.001)
        mspc = librosa.feature.melspectrogram(x_norm,
                                              fs,
                                              n_fft=args.fftl,
                                              hop_length=shiftl,
                                              n_mels=args.mspc_dim,
                                              power=1.0)
        mspc = np.log10(np.maximum(EPS, mspc.T))

        # save to hdf5
        hdf5name = args.hdf5dir + "/" + os.path.basename(wav_name).replace(
            ".wav", ".h5")
        write_hdf5(hdf5name, "/melspc", np.float32(mspc))

        # overwrite wav file
        if args.highpass_cutoff != 0 and args.save_wav:
            wavfile.write(args.wavdir + "/" + os.path.basename(wav_name), fs,
                          np.int16(x))
Example #8
0
def save_DRIVE_to_h5py(train_test, num, config):
    images_path = config.get('DRIVE', train_test + '_images_path')
    labels_path = config.get('DRIVE', train_test + '_labels_path')
    masks_path = config.get('DRIVE', train_test + '_masks_path')
    height = int(config.get('DRIVE', 'height'))
    width = int(config.get('DRIVE', 'width'))

    images = np.empty((num, height, width, 3), dtype=np.float32)
    labels = np.empty((num, height, width, 1), dtype=np.float32)
    masks = np.empty((num, height, width, 1), dtype=np.float32)

    files = os.listdir(images_path)
    for i in range(len(files)):
        # read i-th image.
        images[i] = np.asarray(Image.open(images_path + files[i]))

        # read corresponding label.
        label_name = files[i][0:2] + "_manual1.gif"
        labels[i] = np.asarray(Image.open(labels_path + label_name)).reshape(
            (height, width, 1))

        # read corresponding mask.
        mask_name = ""
        if train_test == "train":
            mask_name = files[i][0:2] + "_training_mask.gif"
        elif train_test == "test":
            mask_name = files[i][0:2] + "_test_mask.gif"
        masks[i] = np.asarray(Image.open(masks_path + mask_name)).reshape(
            (height, width, 1))
    """ Check data. """
    print('DRIVE', train_test)
    print('images', images.shape, images.dtype, np.min(images), np.max(images))
    print('labels', labels.shape, labels.dtype, np.min(labels), np.max(labels))
    print('masks', masks.shape, masks.dtype, np.min(masks), np.max(masks))
    """ Visualize datasets to check integrity."""
    """
    visualize(group_images(images, 5),
              save_path = './logs/DRIVE_' + train_test + '_images.png')
    visualize(group_images(labels, 5),
              save_path = './logs/DRIVE_' + train_test + '_labels.png')
    visualize(group_images(masks, 5),
              save_path = './logs/DRIVE_' + train_test + '_masks.png')
    """
    #visualize(group_images(images, 4)).show()
    #visualize(group_images(labels, 4)).show()
    #visualize(group_images(masks, 4)).show()

    save_path = config.get('DRIVE', 'h5py_save_path')
    if os.path.exists(save_path) == False:
        os.system('mkdir {}'.format(save_path))
    write_hdf5(images, save_path + train_test + '_images' + '.hdf5')
    write_hdf5(labels, save_path + train_test + '_labels' + '.hdf5')
    write_hdf5(masks, save_path + train_test + '_masks' + '.hdf5')
Example #9
0
def save_IVDM_to_h5py(train_test, num, config):
    images_path = config.get('IVDM', train_test + '_images_path')
    labels_path = config.get('IVDM', train_test + '_labels_path')
    masks_path = config.get('IVDM', train_test + '_masks_path')
    height = int(config.get('IVDM', 'height'))
    width = int(config.get('IVDM', 'width'))
    #创建给定形状和类型的空数组
    images = np.empty((num, height, width, 1), dtype=np.float32)
    labels = np.empty((num, height, width, 1), dtype=np.float32)
    masks = np.empty((num, height, width, 1), dtype=np.float32)

    files = os.listdir(images_path)
    for i in range(len(files)):
        #读取腰椎图像
        images[i] = np.asarray(Image.open(images_path + files[i])).reshape(
            (height, width, 1))

        #读取标签
        label_name = files[i][0:3] + "_manual.png"
        labels[i] = np.asarray(Image.open(labels_path + label_name)).reshape(
            (height, width, 1))

        #读取掩模
        mask_name = ""
        if train_test == "train":
            mask_name = files[i][0:3] + "_training_mask.png"
        elif train_test == "test":
            mask_name = files[i][0:3] + "_test_mask.png"
        masks[i] = np.asarray(Image.open(masks_path + mask_name)).reshape(
            (height, width, 1))

    #打印数据信息
    print('IVDM', train_test)
    print('images', images.shape, images.dtype, np.min(images), np.max(images))
    print('labels', labels.shape, labels.dtype, np.min(labels), np.max(labels))
    print('masks', masks.shape, masks.dtype, np.min(masks), np.max(masks))

    #保存为.hdf5文件
    save_path = config.get('IVDM', 'h5py_save_path')
    if os.path.exists(save_path) == False:
        os.system('mkdir {}'.format(save_path))
    write_hdf5(images, save_path + train_test + '_images' + '.hdf5')
    write_hdf5(labels, save_path + train_test + '_labels' + '.hdf5')
    write_hdf5(masks, save_path + train_test + '_masks' + '.hdf5')
Example #10
0
def save_CHASEDB_to_h5py(train_test, num, config):
    images_path = config.get('CHASEDB', train_test + '_images_path')
    labels_path = config.get('CHASEDB', train_test + '_labels_path')
    masks_path = config.get('CHASEDB', train_test + '_masks_path')
    height = int(config.get('CHASEDB', 'height'))
    width = int(config.get('CHASEDB', 'width'))

    images = np.empty((num, height, width, 3), dtype=np.float32)
    labels = np.empty((num, height, width, 1), dtype=np.float32)
    masks = np.empty((num, height, width, 1), dtype=np.float32)

    files = os.listdir(images_path)
    for i in range(len(files)):
        # read i-th image.
        images[i] = np.asarray(Image.open(images_path + files[i]))

        # read corresponding label.
        label_name = files[i][:9] + "_1stHO.png"
        labels[i] = np.asarray(Image.open(labels_path + label_name)).reshape(
            (height, width, 1))

        # read corresponding mask.
        mask_name = 'mask_' + files[i][6:9] + '.png'
        masks[i] = np.asarray(Image.open(masks_path + mask_name)).reshape(
            (height, width, 1))
    """ Check data. """
    print('CHASEDB', train_test)
    print('images', images.shape, images.dtype, np.min(images), np.max(images))
    print('labels', labels.shape, labels.dtype, np.min(labels), np.max(labels))
    print('masks', masks.shape, masks.dtype, np.min(masks), np.max(masks))

    save_path = config.get('CHASEDB', 'h5py_save_path')
    if os.path.exists(save_path) == False:
        os.system('mkdir {}'.format(save_path))
    write_hdf5(images, save_path + train_test + '_images' + '.hdf5')
    write_hdf5(labels, save_path + train_test + '_labels' + '.hdf5')
    write_hdf5(masks, save_path + train_test + '_masks' + '.hdf5')
                rotations[n, k, 0] = (
                    np.random.random() *
                    (config['render_max_x_rotation'] +
                     abs(config['render_min_x_rotation'])) -
                    abs(config['render_min_x_rotation'])) / 180. * math.pi
                rotations[n, k, 1] = (
                    np.random.random() *
                    (config['render_max_y_rotation'] +
                     abs(config['render_min_y_rotation'])) -
                    abs(config['render_min_y_rotation'])) / 180. * math.pi

                mesh.rotate(rotations[n, k])
                mesh.translate(mesh_center)

                np_vertices = mesh.vertices.astype(np.float64)
                np_faces = mesh.faces.astype(np.float64)
                np_faces += 1

                depth_map, mask, img = pyrender.render(np_vertices.T.copy(),
                                                       np_faces.T.copy(),
                                                       intrinsics, znf, size)
                depth_maps[n][k] = depth_map

            print('[Data] rendered %s %d/%d' % (off_files[n],
                                                (n + 1), n_files))

        utils.write_hdf5(angles_file, rotations)
        print('[Data] wrote %s' % angles_file)
        utils.write_hdf5(depth_file, depth_maps)
        print('[Data] wrote %s' % depth_file)
                waited = True
                print('[Data] waiting for %s' % depth_file)
                time.sleep(10)

            # Wait for synchronization.
            if waited:
                time.sleep(10)

            try:
                # Sometimes signature of HDF5 files is still not available.
                depths = utils.read_hdf5(depth_file)
            except IOError:
                print('[Data] could not read %s' % depth_file)
                time.sleep(5)

            # Try again, now it can really fail if file is not there.
            depths = utils.read_hdf5(depth_file)

            timer.reset()
            tsdf = fusion(depths, Rs)
            tsdf = tsdf[0]

            utils.write_hdf5(tsdf_file, tsdf)
            print('[Data] wrote %s (%f seconds)' %
                  (tsdf_file, timer.elapsed()))

            vertices, triangles = mcubes.marching_cubes(-tsdf, 0)
            vertices /= config['watertight_fusion']['resolution']
            vertices -= 0.5
            mcubes.export_off(vertices, triangles, off_file)
            print('[Data] wrote %s (%f seconds)' % (off_file, timer.elapsed()))
Example #13
0
    def decode_RNN(wav_list, gpu, cvlist=None, cvlist_src=None, \
        mcd_cvlist_src=None, mcdstd_cvlist_src=None, mcdpow_cvlist_src=None, mcdpowstd_cvlist_src=None):
        with torch.cuda.device(gpu):
            mean_trg = torch.FloatTensor(
                read_hdf5(args.stats_jnt,
                          "/mean_feat_org_lf0")[config.stdim:]).cuda()
            std_trg = torch.FloatTensor(
                read_hdf5(args.stats_jnt,
                          "/scale_feat_org_lf0")[config.stdim:]).cuda()
            # define model and load parameters
            logging.info(config)
            logging.info("model")
            with torch.no_grad():
                model_encoder = GRU_RNN_STOCHASTIC(
                    in_dim=config.in_dim,
                    out_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers,
                    hidden_units=config.hidden_units,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    arparam=config.arparam,
                    spk_dim=n_spk,
                    causal_conv=config.causal_conv,
                    scale_out_flag=False)
                model_decoder = GRU_RNN(in_dim=config.lat_dim + n_spk,
                                        out_dim=config.out_dim,
                                        hidden_layers=config.hidden_layers,
                                        hidden_units=config.hidden_units,
                                        kernel_size=config.kernel_size_dec,
                                        dilation_size=config.dilation_size_dec,
                                        causal_conv=config.causal_conv,
                                        scale_in_flag=False)
                logging.info(model_encoder)
                logging.info(model_decoder)
                model_encoder.load_state_dict(
                    torch.load(args.model)["model_encoder"])
                model_decoder.load_state_dict(
                    torch.load(args.model)["model_decoder"])
                model_encoder.cuda()
                model_decoder.cuda()
                model_encoder.eval()
                model_decoder.eval()
                for param in model_encoder.parameters():
                    param.requires_grad = False
                for param in model_decoder.parameters():
                    param.requires_grad = False
                if config.arparam:
                    init_pp = np.zeros((1, 1, config.lat_dim * 2 + n_spk))
                else:
                    init_pp = np.zeros((1, 1, config.lat_dim + n_spk))
                y_in_pp = torch.FloatTensor(init_pp).cuda()
                y_in_src = y_in_trg = torch.unsqueeze(
                    torch.unsqueeze((0 - mean_trg) / std_trg, 0), 0)
            fs = args.fs
            fft_size = args.fftl
            mcep_dim = model_decoder.out_dim - 1
            for wav_file in wav_list:
                # convert mcep
                feat_file = os.path.join(
                    args.h5outdir,
                    os.path.basename(wav_file).replace(".wav", ".h5"))
                logging.info("cvmcep " + feat_file + " " + wav_file)

                fs, x = read_wav(wav_file, cutoff=70)

                time_axis, f0, sp, ap = analyze_range(x, fs=fs, minf0=args.minf0, maxf0=args.maxf0, \
                                                        fperiod=args.shiftms, fftl=args.fftl)
                logging.info(sp.shape)

                mcep = ps.sp2mc(sp, mcep_dim, args.mcep_alpha)
                logging.info(mcep.shape)
                codeap = pw.code_aperiodicity(ap, fs)
                logging.info(codeap.shape)

                npow = spc2npow(sp)
                logging.info(npow.shape)
                _, spcidx = extfrm(mcep, npow, power_threshold=args.pow)
                spcidx = spcidx[0]
                logging.info(spcidx.shape)

                uv, contf0 = convert_continuos_f0(np.array(f0))
                uv = np.expand_dims(uv, axis=-1)
                logging.info(uv.shape)
                cont_f0_lpf = low_pass_filter(contf0,
                                              int(1.0 /
                                                  (args.shiftms * 0.001)),
                                              cutoff=LP_CUTOFF)
                logcontf0 = np.expand_dims(np.log(cont_f0_lpf), axis=-1)
                logging.info(logcontf0.shape)
                feat = np.c_[uv, logcontf0, codeap, mcep]
                logging.info(feat.shape)

                logging.info("generate")
                with torch.no_grad():
                    lat_feat_src, _, _, _, _ = \
                        model_encoder(torch.FloatTensor(feat).cuda(), y_in_pp, sampling=False)

                    src_code = np.zeros((lat_feat_src.shape[0], n_spk))
                    src_code[:, src_code_idx] = 1
                    src_code = torch.FloatTensor(src_code).cuda()

                    trg_code = np.zeros((lat_feat_src.shape[0], n_spk))
                    trg_code[:, trg_code_idx] = 1
                    trg_code = torch.FloatTensor(trg_code).cuda()

                    cvmcep_src, _, _ = model_decoder(
                        torch.cat((src_code, lat_feat_src), 1), y_in_src)
                    cvmcep_src = np.array(cvmcep_src.cpu().data.numpy(),
                                          dtype=np.float64)

                    cvmcep, _, _ = model_decoder(
                        torch.cat((trg_code, lat_feat_src), 1), y_in_trg)
                    cvmcep = np.array(cvmcep.cpu().data.numpy(),
                                      dtype=np.float64)

                logging.info(lat_feat_src.shape)
                logging.info(cvmcep_src.shape)
                logging.info(cvmcep.shape)

                cvf0 = convert_f0(f0, f0_range_mean_src, f0_range_std_src,
                                  f0_range_mean_trg, f0_range_std_trg)
                uv_cv, contf0_cv = convert_continuos_f0(np.array(cvf0))
                uv_cv = np.expand_dims(uv_cv, axis=-1)
                logging.info(uv_cv.shape)
                cont_f0_lpf_cv = low_pass_filter(contf0_cv,
                                                 int(1.0 /
                                                     (args.shiftms * 0.001)),
                                                 cutoff=LP_CUTOFF)
                logcontf0_cv = np.expand_dims(np.log(cont_f0_lpf_cv), axis=-1)
                logging.info(logcontf0_cv.shape)
                feat_cv = np.c_[uv_cv, logcontf0_cv, codeap]
                logging.info(feat_cv.shape)

                feat_cvmcep = np.c_[feat_cv, cvmcep]
                logging.info(feat_cvmcep.shape)
                write_path = '/feat_cvmcep_cycvae-' + model_epoch
                logging.info(feat_file + ' ' + write_path)
                write_hdf5(feat_file, write_path, feat_cvmcep)
                cvlist.append(np.var(cvmcep[:, 1:], axis=0))

                _, mcdpow_arr = dtw.calc_mcd(np.array(mcep[np.array(spcidx),:], dtype=np.float64), \
                                                np.array(cvmcep_src[np.array(spcidx),:], dtype=np.float64))
                _, mcd_arr = dtw.calc_mcd(np.array(mcep[np.array(spcidx),1:], dtype=np.float64), \
                                            np.array(cvmcep_src[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                logging.info("mcdpow_src_cv: %.6f dB +- %.6f" %
                             (mcdpow_mean, mcdpow_std))
                logging.info("mcd_src_cv: %.6f dB +- %.6f" %
                             (mcd_mean, mcd_std))
                mcdpow_cvlist_src.append(mcdpow_mean)
                mcdpowstd_cvlist_src.append(mcdpow_std)
                mcd_cvlist_src.append(mcd_mean)
                mcdstd_cvlist_src.append(mcd_std)
                cvlist_src.append(np.var(cvmcep_src[:, 1:], axis=0))

                logging.info("synth voco")
                cvsp = ps.mc2sp(np.ascontiguousarray(cvmcep), args.mcep_alpha,
                                fft_size)
                logging.info(cvsp.shape)
                wav = np.clip(
                    pw.synthesize(cvf0,
                                  cvsp,
                                  ap,
                                  fs,
                                  frame_period=args.shiftms), -1, 1)
                wavpath = os.path.join(
                    args.outdir,
                    os.path.basename(wav_file).replace(".wav", "_cv.wav"))
                sf.write(wavpath, wav, fs, 'PCM_16')
                logging.info(wavpath)

                logging.info("synth anasyn")
                wav = np.clip(
                    pw.synthesize(f0, sp, ap, fs, frame_period=args.shiftms),
                    -1, 1)
                wavpath = os.path.join(
                    args.outdir,
                    os.path.basename(wav_file).replace(".wav", "_anasyn.wav"))
                sf.write(wavpath, wav, fs, 'PCM_16')
                logging.info(wavpath)
    def gpu_decode(feat_list, gpu, cvlist=None, mcdpow_cvlist=None, mcdpowstd_cvlist=None, mcd_cvlist=None, \
                    mcdstd_cvlist=None, cvlist_dv=None, mcdpow_cvlist_dv=None, mcdpowstd_cvlist_dv=None, \
                    mcd_cvlist_dv=None, mcdstd_cvlist_dv=None, \
                    f0rmse_cvlist=None, f0corr_cvlist=None, caprmse_cvlist=None, \
                    f0rmse_cvlist_dv=None, f0corr_cvlist_dv=None, caprmse_cvlist_dv=None, \
                    cvlist_cyc=None, mcdpow_cvlist_cyc=None, mcdpowstd_cvlist_cyc=None, mcd_cvlist_cyc=None, \
                    mcdstd_cvlist_cyc=None, cvlist_cyc_dv=None, mcdpow_cvlist_cyc_dv=None, mcdpowstd_cvlist_cyc_dv=None, \
                    mcd_cvlist_cyc_dv=None, mcdstd_cvlist_cyc_dv=None, \
                    f0rmse_cvlist_cyc=None, f0corr_cvlist_cyc=None, caprmse_cvlist_cyc=None, \
                    f0rmse_cvlist_cyc_dv=None, f0corr_cvlist_cyc_dv=None, caprmse_cvlist_cyc_dv=None):
        with torch.cuda.device(gpu):
            # define model and load parameters
            with torch.no_grad():
                model_encoder_mcep = GRU_VAE_ENCODER(
                    in_dim=config.mcep_dim+config.excit_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=config.bi_enc,
                    cont=False,
                    pad_first=True,
                    right_size=config.right_size,
                    ar=config.ar_enc)
                logging.info(model_encoder_mcep)
                model_decoder_mcep = GRU_SPEC_DECODER(
                    feat_dim=config.lat_dim,
                    out_dim=config.mcep_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_dec,
                    hidden_units=config.hidden_units_dec,
                    kernel_size=config.kernel_size_dec,
                    dilation_size=config.dilation_size_dec,
                    causal_conv=config.causal_conv_dec,
                    bi=config.bi_dec,
                    spkidtr_dim=config.spkidtr_dim,
                    pad_first=True,
                    ar=config.ar_dec)
                logging.info(model_decoder_mcep)
                model_encoder_excit = GRU_VAE_ENCODER(
                    in_dim=config.mcep_dim+config.excit_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim_e,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=config.bi_enc,
                    cont=False,
                    pad_first=True,
                    right_size=config.right_size,
                    ar=config.ar_enc)
                logging.info(model_encoder_excit)
                model_decoder_excit = GRU_EXCIT_DECODER(
                    feat_dim=config.lat_dim_e,
                    cap_dim=config.cap_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_lf0,
                    hidden_units=config.hidden_units_lf0,
                    kernel_size=config.kernel_size_lf0,
                    dilation_size=config.dilation_size_lf0,
                    causal_conv=config.causal_conv_lf0,
                    bi=config.bi_lf0,
                    spkidtr_dim=config.spkidtr_dim,
                    pad_first=True,
                    ar=config.ar_f0)
                logging.info(model_decoder_excit)
                model_vq = torch.nn.Embedding(config.ctr_size, config.lat_dim)
                logging.info(model_vq)
                model_encoder_mcep.load_state_dict(torch.load(args.model)["model_encoder_mcep"])
                model_decoder_mcep.load_state_dict(torch.load(args.model)["model_decoder_mcep"])
                model_encoder_excit.load_state_dict(torch.load(args.model)["model_encoder_excit"])
                model_decoder_excit.load_state_dict(torch.load(args.model)["model_decoder_excit"])
                model_vq.load_state_dict(torch.load(args.model)["model_vq"])
                model_encoder_mcep.cuda()
                model_decoder_mcep.cuda()
                model_encoder_excit.cuda()
                model_decoder_excit.cuda()
                model_vq.cuda()
                model_encoder_mcep.eval()
                model_decoder_mcep.eval()
                model_encoder_excit.eval()
                model_decoder_excit.eval()
                model_vq.eval()
                for param in model_encoder_mcep.parameters():
                    param.requires_grad = False
                for param in model_decoder_mcep.parameters():
                    param.requires_grad = False
                for param in model_encoder_excit.parameters():
                    param.requires_grad = False
                for param in model_decoder_excit.parameters():
                    param.requires_grad = False
                for param in model_vq.parameters():
                    param.requires_grad = False
                if config.ar_enc:
                    yz_in = torch.zeros((1, 1, n_spk+config.lat_dim)).cuda()
                    yz_in_e = torch.zeros((1, 1, n_spk+config.lat_dim_e)).cuda()
                if config.ar_dec or config.ar_f0:
                    mean_stats = torch.FloatTensor(read_hdf5(config.stats, "/mean_"+config.string_path.replace("/","")))
                    scale_stats = torch.FloatTensor(read_hdf5(config.stats, "/scale_"+config.string_path.replace("/","")))
                if config.ar_dec:
                    x_in = ((torch.zeros((1, 1, config.mcep_dim))-mean_stats[config.excit_dim:])/scale_stats[config.excit_dim:]).cuda()
                if config.ar_f0:
                    e_in = torch.cat((torch.zeros(1,1,1), (torch.zeros(1,1,1)-mean_stats[1:2])/scale_stats[1:2], \
                                    torch.zeros(1,1,1), (torch.zeros(1,1,config.cap_dim)-mean_stats[3:config.excit_dim])/scale_stats[3:config.excit_dim]), 2).cuda()
            count = 0
            pad_left = (model_encoder_mcep.pad_left + model_decoder_mcep.pad_left)*2
            pad_right = (model_encoder_mcep.pad_right + model_decoder_mcep.pad_right)*2
            outpad_lefts = [None]*3
            outpad_rights = [None]*3
            outpad_lefts[0] = pad_left-model_encoder_mcep.pad_left
            outpad_rights[0] = pad_right-model_encoder_mcep.pad_right
            outpad_lefts[1] = outpad_lefts[0]-model_decoder_mcep.pad_left
            outpad_rights[1] = outpad_rights[0]-model_decoder_mcep.pad_right
            outpad_lefts[2] = outpad_lefts[1]-model_encoder_mcep.pad_left
            outpad_rights[2] = outpad_rights[1]-model_encoder_mcep.pad_right
            for feat_file in feat_list:
                # convert mcep
                logging.info("recmcep " + feat_file)

                feat_org = read_hdf5(feat_file, "/feat_mceplf0cap")
                logging.info(feat_org.shape)

                with torch.no_grad():
                    feat = F.pad(torch.FloatTensor(feat_org).cuda().unsqueeze(0).transpose(1,2), (pad_left,pad_right), "replicate").transpose(1,2)

                    if config.ar_enc:
                        spk_logits, lat_src, _, _ = model_encoder_mcep(feat, yz_in=yz_in)
                        spk_logits_e, lat_src_e, _, _ = model_encoder_excit(feat, yz_in=yz_in)
                    else:
                        spk_logits, lat_src, _ = model_encoder_mcep(feat)
                        spk_logits_e, lat_src_e, _ = model_encoder_excit(feat)
                    idx_vq = nn_search_batch(lat_src, model_vq.weight)
                    lat_src = model_vq(idx_vq)
                    if outpad_rights[0] > 0:
                        unique, counts = np.unique(idx_vq[:,outpad_lefts[0]:-outpad_rights[0]].cpu().data.numpy(), return_counts=True)
                    else:
                        unique, counts = np.unique(idx_vq[:,outpad_lefts[0]:].cpu().data.numpy(), return_counts=True)
                    logging.info("input vq")
                    logging.info(dict(zip(unique, counts)))
                    idx_vq_e = nn_search_batch(lat_src_e, model_vq.weight)
                    lat_src_e = model_vq(idx_vq_e)
                    if outpad_rights[0] > 0:
                        unique, counts = np.unique(idx_vq_e[:,outpad_lefts[0]:-outpad_rights[0]].cpu().data.numpy(), return_counts=True)
                    else:
                        unique, counts = np.unique(idx_vq_e[:,outpad_lefts[0]:].cpu().data.numpy(), return_counts=True)
                    logging.info("input vq_e")
                    logging.info(dict(zip(unique, counts)))
                    logging.info('input spkpost')
                    if outpad_rights[0] > 0:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[0]:-outpad_rights[0]], dim=-1), 1))
                    else:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[0]:], dim=-1), 1))
                    logging.info('input spkpost_e')
                    if outpad_rights[0] > 0:
                        logging.info(torch.mean(F.softmax(spk_logits_e[:,outpad_lefts[0]:-outpad_rights[0]], dim=-1), 1))
                    else:
                        logging.info(torch.mean(F.softmax(spk_logits_e[:,outpad_lefts[0]:], dim=-1), 1))

                    src_code = (torch.ones((1, lat_src.shape[1]))*spk_idx).cuda().long()

                    if config.ar_dec:
                        cvmcep_src, _, _ = model_decoder_mcep(src_code, lat_src, x_in=x_in)
                    else:
                        cvmcep_src, _ = model_decoder_mcep(src_code, lat_src)
                    if config.ar_f0:
                        cvlf0_src, _, _ = model_decoder_excit(src_code, lat_src_e, e_in=e_in)
                    else:
                        cvlf0_src, _ = model_decoder_excit(src_code, lat_src_e)

                    cv_feat = torch.cat((cvlf0_src, cvmcep_src), 2)
                    if config.ar_enc:
                        spk_logits, lat_rec, _, _ = model_encoder_mcep(cv_feat, yz_in=yz_in)
                        spk_logits_e, lat_rec_e, _, _ = model_encoder_excit(cv_feat, yz_in=yz_in)
                    else:
                        spk_logits, lat_rec, _ = model_encoder_mcep(cv_feat)
                        spk_logits_e, lat_rec_e, _ = model_encoder_excit(cv_feat)
                    idx_vq = nn_search_batch(lat_rec, model_vq.weight)
                    lat_rec = model_vq(idx_vq)
                    if outpad_rights[2] > 0:
                        unique, counts = np.unique(idx_vq[:,outpad_lefts[2]:-outpad_rights[2]].cpu().data.numpy(), return_counts=True)
                    else:
                        unique, counts = np.unique(idx_vq[:,outpad_lefts[2]:].cpu().data.numpy(), return_counts=True)
                    logging.info("input vq")
                    logging.info(dict(zip(unique, counts)))
                    idx_vq_e = nn_search_batch(lat_rec_e, model_vq.weight)
                    lat_rec_e = model_vq(idx_vq_e)
                    if outpad_rights[2] > 0:
                        unique, counts = np.unique(idx_vq_e[:,outpad_lefts[2]:-outpad_rights[2]].cpu().data.numpy(), return_counts=True)
                    else:
                        unique, counts = np.unique(idx_vq_e[:,outpad_lefts[2]:].cpu().data.numpy(), return_counts=True)
                    logging.info("input vq_e")
                    logging.info(dict(zip(unique, counts)))
                    logging.info('rec spkpost')
                    if outpad_rights[2] > 0:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[2]:-outpad_rights[2]], dim=-1), 1))
                    else:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[2]:], dim=-1), 1))
                    logging.info('rec spkpost_e')
                    if outpad_rights[2] > 0:
                        logging.info(torch.mean(F.softmax(spk_logits_e[:,outpad_lefts[2]:-outpad_rights[2]], dim=-1), 1))
                    else:
                        logging.info(torch.mean(F.softmax(spk_logits_e[:,outpad_lefts[2]:], dim=-1), 1))

                    src_code = (torch.ones((1, lat_rec.shape[1]))*spk_idx).cuda().long()

                    if config.ar_dec:
                        cvmcep_cyc, _, _ = model_decoder_mcep(src_code, lat_rec, x_in=x_in)
                    else:
                        cvmcep_cyc, _ = model_decoder_mcep(src_code, lat_rec)
                    if config.ar_f0:
                        cvlf0_cyc, _, _ = model_decoder_excit(src_code, lat_rec_e, e_in=e_in)
                    else:
                        cvlf0_cyc, _ = model_decoder_excit(src_code, lat_rec_e)

                    if outpad_rights[1] > 0:
                        cvmcep_src = cvmcep_src[:,outpad_lefts[1]:-outpad_rights[1]]
                        cvlf0_src = cvlf0_src[:,outpad_lefts[1]:-outpad_rights[1]]
                    else:
                        cvmcep_src = cvmcep_src[:,outpad_lefts[1]:]
                        cvlf0_src = cvlf0_src[:,outpad_lefts[1]:]

                    feat_rec = torch.cat((torch.round(cvlf0_src[:,:,:1]), cvlf0_src[:,:,1:2], \
                                            torch.round(cvlf0_src[:,:,2:3]), cvlf0_src[:,:,3:], cvmcep_src), \
                                                2)[0].cpu().data.numpy()
                    feat_cyc = torch.cat((torch.round(cvlf0_cyc[:,:,:1]), cvlf0_cyc[:,:,1:2], \
                                            torch.round(cvlf0_cyc[:,:,2:3]), cvlf0_cyc[:,:,3:], cvmcep_cyc), \
                                                2)[0].cpu().data.numpy()

                    cvmcep_src = np.array(cvmcep_src[0].cpu().data.numpy(), dtype=np.float64)
                    cvlf0_src = np.array(cvlf0_src[0].cpu().data.numpy(), dtype=np.float64)

                    cvmcep_cyc = np.array(cvmcep_cyc[0].cpu().data.numpy(), dtype=np.float64)
                    cvlf0_cyc = np.array(cvlf0_cyc[0].cpu().data.numpy(), dtype=np.float64)

                logging.info(cvlf0_src.shape)
                logging.info(cvmcep_src.shape)

                logging.info(cvlf0_cyc.shape)
                logging.info(cvmcep_cyc.shape)

                mcep = np.array(feat_org[:,-model_decoder_mcep.out_dim:])
                f0 = np.array(np.rint(feat_org[:,0])*np.exp(feat_org[:,1]))
                codeap = np.array(np.rint(feat_org[:,2:3])*(-np.exp(feat_org[:,3:feat_org.shape[-1]-model_decoder_mcep.out_dim])))
 
                cvf0_src = np.array(np.rint(cvlf0_src[:,0])*np.exp(cvlf0_src[:,1]))
                cvcodeap_src = np.array(np.rint(cvlf0_src[:,2:3])*(-np.exp(cvlf0_src[:,3:])))
                f0_rmse = np.sqrt(np.mean((cvf0_src-f0)**2))
                logging.info('F0_rmse_rec: %lf Hz' % (f0_rmse))
                cvf0_src_mean = np.mean(cvf0_src)
                f0_mean = np.mean(f0)
                f0_corr = np.sum((cvf0_src-cvf0_src_mean)*(f0-f0_mean))/\
                            (np.sqrt(np.sum((cvf0_src-cvf0_src_mean)**2))*np.sqrt(np.sum((f0-f0_mean)**2)))
                logging.info('F0_corr_rec: %lf' % (f0_corr))

                codeap_rmse = np.sqrt(np.mean((cvcodeap_src-codeap)**2, axis=0))
                for i in range(codeap_rmse.shape[-1]):
                    logging.info('codeap-%d_rmse_rec: %lf dB' % (i+1, codeap_rmse[i]))

                cvf0_cyc = np.array(np.rint(cvlf0_cyc[:,0])*np.exp(cvlf0_cyc[:,1]))
                cvcodeap_cyc = np.array(np.rint(cvlf0_cyc[:,2:3])*(-np.exp(cvlf0_cyc[:,3:])))
                f0_rmse_cyc = np.sqrt(np.mean((cvf0_cyc-f0)**2))
                logging.info('F0_rmse_cyc: %lf Hz' % (f0_rmse_cyc))
                cvf0_cyc_mean = np.mean(cvf0_cyc)
                f0_mean = np.mean(f0)
                f0_corr_cyc = np.sum((cvf0_cyc-cvf0_cyc_mean)*(f0-f0_mean))/\
                            (np.sqrt(np.sum((cvf0_cyc-cvf0_cyc_mean)**2))*np.sqrt(np.sum((f0-f0_mean)**2)))
                logging.info('F0_corr_cyc: %lf' % (f0_corr_cyc))

                codeap_rmse_cyc = np.sqrt(np.mean((cvcodeap_cyc-codeap)**2, axis=0))
                for i in range(codeap_rmse_cyc.shape[-1]):
                    logging.info('codeap-%d_rmse_cyc: %lf dB' % (i+1, codeap_rmse_cyc[i]))

                spcidx = read_hdf5(feat_file, "/spcidx_range")[0]

                _, _, _, mcdpow_arr = dtw.dtw_org_to_trg(np.array(cvmcep_src[np.array(spcidx),:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),:], dtype=np.float64))
                _, _, _, mcd_arr = dtw.dtw_org_to_trg(np.array(cvmcep_src[np.array(spcidx),1:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                logging.info("mcdpow_rec: %.6f dB +- %.6f" % (mcdpow_mean, mcdpow_std))
                logging.info("mcd_rec: %.6f dB +- %.6f" % (mcd_mean, mcd_std))

                _, _, _, mcdpow_arr = dtw.dtw_org_to_trg(np.array(cvmcep_cyc[np.array(spcidx),:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),:], dtype=np.float64))
                _, _, _, mcd_arr = dtw.dtw_org_to_trg(np.array(cvmcep_cyc[np.array(spcidx),1:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean_cyc = np.mean(mcdpow_arr)
                mcdpow_std_cyc = np.std(mcdpow_arr)
                mcd_mean_cyc = np.mean(mcd_arr)
                mcd_std_cyc = np.std(mcd_arr)
                logging.info("mcdpow_cyc: %.6f dB +- %.6f" % (mcdpow_mean_cyc, mcdpow_std_cyc))
                logging.info("mcd_cyc: %.6f dB +- %.6f" % (mcd_mean_cyc, mcd_std_cyc))
            
                logging.info('org f0')
                logging.info(f0[10:15])
                logging.info('rec f0')
                logging.info(cvf0_src[10:15])
                logging.info('cyc f0')
                logging.info(cvf0_cyc[10:15])
                logging.info('org cap')
                logging.info(codeap[10:15])
                logging.info('rec cap')
                logging.info(cvcodeap_src[10:15])
                logging.info('cyc cap')
                logging.info(cvcodeap_cyc[10:15])

                dataset = feat_file.split('/')[1].split('_')[0]
                if 'tr' in dataset:
                    logging.info('trn')
                    f0rmse_cvlist.append(f0_rmse)
                    f0corr_cvlist.append(f0_corr)
                    caprmse_cvlist.append(codeap_rmse)
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpowstd_cvlist.append(mcdpow_std)
                    mcd_cvlist.append(mcd_mean)
                    mcdstd_cvlist.append(mcd_std)
                    cvlist.append(np.var(cvmcep_src[:,1:], axis=0))
                    logging.info(len(cvlist))
                    f0rmse_cvlist_cyc.append(f0_rmse_cyc)
                    f0corr_cvlist_cyc.append(f0_corr_cyc)
                    caprmse_cvlist_cyc.append(codeap_rmse_cyc)
                    mcdpow_cvlist_cyc.append(mcdpow_mean_cyc)
                    mcdpow_cvlist_cyc.append(mcdpow_mean_cyc)
                    mcdpowstd_cvlist_cyc.append(mcdpow_std_cyc)
                    mcd_cvlist_cyc.append(mcd_mean_cyc)
                    mcdstd_cvlist_cyc.append(mcd_std_cyc)
                    cvlist_cyc.append(np.var(cvmcep_cyc[:,1:], axis=0))
                elif 'dv' in dataset:
                    logging.info('dev')
                    f0rmse_cvlist_dv.append(f0_rmse)
                    f0corr_cvlist_dv.append(f0_corr)
                    caprmse_cvlist_dv.append(codeap_rmse)
                    mcdpow_cvlist_dv.append(mcdpow_mean)
                    mcdpowstd_cvlist_dv.append(mcdpow_std)
                    mcd_cvlist_dv.append(mcd_mean)
                    mcdstd_cvlist_dv.append(mcd_std)
                    cvlist_dv.append(np.var(cvmcep_src[:,1:], axis=0))
                    logging.info(len(cvlist_dv))
                    f0rmse_cvlist_cyc_dv.append(f0_rmse_cyc)
                    f0corr_cvlist_cyc_dv.append(f0_corr_cyc)
                    caprmse_cvlist_cyc_dv.append(codeap_rmse_cyc)
                    mcdpow_cvlist_cyc_dv.append(mcdpow_mean_cyc)
                    mcdpow_cvlist_cyc_dv.append(mcdpow_mean_cyc)
                    mcdpowstd_cvlist_cyc_dv.append(mcdpow_std_cyc)
                    mcd_cvlist_cyc_dv.append(mcd_mean_cyc)
                    mcdstd_cvlist_cyc_dv.append(mcd_std_cyc)
                    cvlist_cyc_dv.append(np.var(cvmcep_cyc[:,1:], axis=0))

                logging.info('write rec to h5')
                outh5dir = os.path.join(os.path.dirname(os.path.dirname(feat_file)), args.spk+"-"+args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_rec.shape)
                write_hdf5(feat_file, args.string_path, feat_rec)

                logging.info('write cyc to h5')
                outh5dir = os.path.join(os.path.dirname(os.path.dirname(feat_file)), args.spk+"-"+args.spk+"-"+args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_cyc.shape)
                write_hdf5(feat_file, args.string_path, feat_cyc)

                count += 1
Example #15
0
        print('[Data] reading ' + config_folder + config_file)
        config = utils.read_json(config_folder + config_file)
        
        height = config['height']
        width = config['width']
        depth = config['depth']

        space_file = filename('space_file')
        space = utils.read_hdf5(space_file)
        input_file = filename('input_file')
        input = utils.read_hdf5(input_file)

        space[input == 1] = 0
        if len(space.shape) < 5:
            space = np.expand_dims(space, axis=1)

        utils.write_hdf5(space_file, space)
        print('[Data] wrote ' + space_file)

        for key in ['input', 'space', 'output', 'sdf', 'input_sdf']:
            file = filename(key + '_file')
            volumes = utils.read_hdf5(file)

            volumes = np.squeeze(volumes)
            if len(volumes.shape) < 4:
                volumes = np.expand_dims(volumes, axis = 0)

            if len(volumes.shape) < 5:
                utils.write_hdf5(file, np.expand_dims(volumes, axis = 1))
                print('[Data] wrote ' + file)
Example #16
0
    def gpu_decode(feat_list, gpu, cvlist=None, mcdpow_cvlist=None, mcdpowstd_cvlist=None, mcd_cvlist=None, \
                    mcdstd_cvlist=None, cvlist_dv=None, mcdpow_cvlist_dv=None, mcdpowstd_cvlist_dv=None, \
                    mcd_cvlist_dv=None, mcdstd_cvlist_dv=None, \
                    cvlist_cyc=None, mcdpow_cvlist_cyc=None, mcdpowstd_cvlist_cyc=None, mcd_cvlist_cyc=None, \
                    mcdstd_cvlist_cyc=None, cvlist_cyc_dv=None, mcdpow_cvlist_cyc_dv=None, mcdpowstd_cvlist_cyc_dv=None, \
                    mcd_cvlist_cyc_dv=None, mcdstd_cvlist_cyc_dv=None):
        with torch.cuda.device(gpu):
            # define model and load parameters
            with torch.no_grad():
                model_encoder = GRU_VAE_ENCODER(
                    in_dim=config.mcep_dim + config.excit_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=config.bi_enc,
                    cont=False,
                    pad_first=True,
                    right_size=config.right_size,
                    ar=config.ar_enc)
                logging.info(model_encoder)
                model_decoder = GRU_SPEC_DECODER(
                    feat_dim=config.lat_dim,
                    out_dim=config.mcep_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_dec,
                    hidden_units=config.hidden_units_dec,
                    kernel_size=config.kernel_size_dec,
                    dilation_size=config.dilation_size_dec,
                    causal_conv=config.causal_conv_dec,
                    bi=config.bi_dec,
                    pad_first=True,
                    ar=config.ar_dec)
                logging.info(model_decoder)
                model_vq = torch.nn.Embedding(config.ctr_size, config.lat_dim)
                logging.info(model_vq)
                model_encoder.load_state_dict(
                    torch.load(args.model)["model_encoder"])
                model_decoder.load_state_dict(
                    torch.load(args.model)["model_decoder"])
                model_vq.load_state_dict(torch.load(args.model)["model_vq"])
                model_encoder.cuda()
                model_decoder.cuda()
                model_vq.cuda()
                model_encoder.eval()
                model_decoder.eval()
                model_vq.eval()
                for param in model_encoder.parameters():
                    param.requires_grad = False
                for param in model_decoder.parameters():
                    param.requires_grad = False
                for param in model_vq.parameters():
                    param.requires_grad = False
                if config.ar_enc:
                    yz_in = torch.zeros((1, 1, n_spk + config.lat_dim)).cuda()
                if config.ar_dec:
                    mean_stats = torch.FloatTensor(
                        read_hdf5(
                            config.stats,
                            "/mean_" + config.string_path.replace("/", "")))
                    scale_stats = torch.FloatTensor(
                        read_hdf5(
                            config.stats,
                            "/scale_" + config.string_path.replace("/", "")))
                    x_in = ((torch.zeros((1, 1, config.mcep_dim)) -
                             mean_stats[config.excit_dim:]) /
                            scale_stats[config.excit_dim:]).cuda()
            count = 0
            pad_left = (model_encoder.pad_left + model_decoder.pad_left) * 2
            pad_right = (model_encoder.pad_right + model_decoder.pad_right) * 2
            outpad_lefts = [None] * 3
            outpad_rights = [None] * 3
            outpad_lefts[0] = pad_left - model_encoder.pad_left
            outpad_rights[0] = pad_right - model_encoder.pad_right
            outpad_lefts[1] = outpad_lefts[0] - model_decoder.pad_left
            outpad_rights[1] = outpad_rights[0] - model_decoder.pad_right
            outpad_lefts[2] = outpad_lefts[1] - model_encoder.pad_left
            outpad_rights[2] = outpad_rights[1] - model_encoder.pad_right
            for feat_file in feat_list:
                # convert mcep
                logging.info("recmcep " + feat_file)

                feat_org = read_hdf5(feat_file, "/feat_mceplf0cap")
                logging.info(feat_org.shape)
                mcep = np.array(feat_org[:, -model_decoder.out_dim:])

                with torch.no_grad():
                    feat = torch.FloatTensor(feat_org).cuda().unsqueeze(0)
                    feat_excit = feat[:, :, :config.excit_dim]

                    if config.ar_enc:
                        spk_logits, lat_src, _, _ = model_encoder(F.pad(feat.transpose(1,2), (pad_left,pad_right), "replicate").transpose(1,2), \
                                                            yz_in=yz_in)
                    else:
                        spk_logits, lat_src, _ = model_encoder(
                            F.pad(feat.transpose(1, 2), (pad_left, pad_right),
                                  "replicate").transpose(1, 2))
                    idx_vq = nn_search_batch(lat_src, model_vq.weight)
                    lat_src = model_vq(idx_vq)
                    if outpad_rights[0] > 0:
                        unique, counts = np.unique(
                            idx_vq[:, outpad_lefts[0]:-outpad_rights[0]].cpu(
                            ).data.numpy(),
                            return_counts=True)
                    else:
                        unique, counts = np.unique(
                            idx_vq[:, outpad_lefts[0]:].cpu().data.numpy(),
                            return_counts=True)
                    logging.info("input vq")
                    logging.info(dict(zip(unique, counts)))
                    logging.info('input spkpost')
                    if outpad_rights[0] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[0]:
                                                     -outpad_rights[0]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[0]:],
                                          dim=-1), 1))

                    src_code = (torch.ones(
                        (1, lat_src.shape[1])) * spk_idx).cuda().long()
                    if config.ar_dec:
                        cvmcep_src, _, _ = model_decoder(src_code,
                                                         lat_src,
                                                         x_in=x_in)
                    else:
                        cvmcep_src, _ = model_decoder(src_code, lat_src)

                    if config.ar_enc:
                        spk_logits, lat_rec, _, _ = model_encoder(torch.cat((F.pad(feat_excit.transpose(1,2), \
                                            (outpad_lefts[1],outpad_rights[1]), "replicate").transpose(1,2), cvmcep_src), 2),
                                                            yz_in=yz_in)
                    else:
                        spk_logits, lat_rec, _ = model_encoder(torch.cat((F.pad(feat_excit.transpose(1,2), \
                                            (outpad_lefts[1],outpad_rights[1]), "replicate").transpose(1,2), cvmcep_src), 2))
                    idx_vq = nn_search_batch(lat_rec, model_vq.weight)
                    lat_rec = model_vq(idx_vq)
                    if outpad_rights[2] > 0:
                        unique, counts = np.unique(
                            idx_vq[:, outpad_lefts[2]:-outpad_rights[2]].cpu(
                            ).data.numpy(),
                            return_counts=True)
                    else:
                        unique, counts = np.unique(
                            idx_vq[:, outpad_lefts[2]:].cpu().data.numpy(),
                            return_counts=True)
                    logging.info("rec vq")
                    logging.info(dict(zip(unique, counts)))
                    logging.info('rec spkpost')
                    if outpad_rights[2] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[2]:
                                                     -outpad_rights[2]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[2]:],
                                          dim=-1), 1))

                    src_code = (torch.ones(
                        (1, lat_rec.shape[1])) * spk_idx).cuda().long()
                    if config.ar_dec:
                        cvmcep_cyc, _, _ = model_decoder(src_code,
                                                         lat_rec,
                                                         x_in=x_in)
                    else:
                        cvmcep_cyc, _ = model_decoder(src_code, lat_rec)

                    if outpad_rights[1] > 0:
                        cvmcep_src = cvmcep_src[:, outpad_lefts[1]:
                                                -outpad_rights[1]]
                    else:
                        cvmcep_src = cvmcep_src[:, outpad_lefts[1]:]

                    feat_rec = torch.cat((feat_excit, cvmcep_src),
                                         2)[0].cpu().data.numpy()
                    feat_cyc = torch.cat((feat_excit, cvmcep_cyc),
                                         2)[0].cpu().data.numpy()

                    cvmcep_src = np.array(cvmcep_src[0].cpu().data.numpy(),
                                          dtype=np.float64)
                    cvmcep_cyc = np.array(cvmcep_cyc[0].cpu().data.numpy(),
                                          dtype=np.float64)

                logging.info(cvmcep_src.shape)
                logging.info(cvmcep_cyc.shape)

                spcidx = read_hdf5(feat_file, "/spcidx_range")[0]

                _, _, _, mcdpow_arr = dtw.dtw_org_to_trg(np.array(cvmcep_src[np.array(spcidx),:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),:], dtype=np.float64))
                _, _, _, mcd_arr = dtw.dtw_org_to_trg(np.array(cvmcep_src[np.array(spcidx),1:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                logging.info("mcdpow_rec: %.6f dB +- %.6f" %
                             (mcdpow_mean, mcdpow_std))
                logging.info("mcd_rec: %.6f dB +- %.6f" % (mcd_mean, mcd_std))

                _, _, _, mcdpow_arr = dtw.dtw_org_to_trg(np.array(cvmcep_cyc[np.array(spcidx),:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),:], dtype=np.float64))
                _, _, _, mcd_arr = dtw.dtw_org_to_trg(np.array(cvmcep_cyc[np.array(spcidx),1:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean_cyc = np.mean(mcdpow_arr)
                mcdpow_std_cyc = np.std(mcdpow_arr)
                mcd_mean_cyc = np.mean(mcd_arr)
                mcd_std_cyc = np.std(mcd_arr)
                logging.info("mcdpow_cyc: %.6f dB +- %.6f" %
                             (mcdpow_mean_cyc, mcdpow_std_cyc))
                logging.info("mcd_cyc: %.6f dB +- %.6f" %
                             (mcd_mean_cyc, mcd_std_cyc))

                dataset = feat_file.split('/')[1].split('_')[0]
                if 'tr' in dataset:
                    logging.info('trn')
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpowstd_cvlist.append(mcdpow_std)
                    mcd_cvlist.append(mcd_mean)
                    mcdstd_cvlist.append(mcd_std)
                    cvlist.append(np.var(cvmcep_src[:, 1:], axis=0))
                    logging.info(len(cvlist))
                    mcdpow_cvlist_cyc.append(mcdpow_mean_cyc)
                    mcdpow_cvlist_cyc.append(mcdpow_mean_cyc)
                    mcdpowstd_cvlist_cyc.append(mcdpow_std_cyc)
                    mcd_cvlist_cyc.append(mcd_mean_cyc)
                    mcdstd_cvlist_cyc.append(mcd_std_cyc)
                    cvlist_cyc.append(np.var(cvmcep_cyc[:, 1:], axis=0))
                elif 'dv' in dataset:
                    logging.info('dev')
                    mcdpow_cvlist_dv.append(mcdpow_mean)
                    mcdpowstd_cvlist_dv.append(mcdpow_std)
                    mcd_cvlist_dv.append(mcd_mean)
                    mcdstd_cvlist_dv.append(mcd_std)
                    cvlist_dv.append(np.var(cvmcep_src[:, 1:], axis=0))
                    logging.info(len(cvlist_dv))
                    mcdpow_cvlist_cyc_dv.append(mcdpow_mean_cyc)
                    mcdpow_cvlist_cyc_dv.append(mcdpow_mean_cyc)
                    mcdpowstd_cvlist_cyc_dv.append(mcdpow_std_cyc)
                    mcd_cvlist_cyc_dv.append(mcd_mean_cyc)
                    mcdstd_cvlist_cyc_dv.append(mcd_std_cyc)
                    cvlist_cyc_dv.append(np.var(cvmcep_cyc[:, 1:], axis=0))

                logging.info('write rec to h5')
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    args.spk + "-" + args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_rec.shape)
                write_hdf5(feat_file, args.string_path, feat_rec)

                logging.info('write cyc to h5')
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    args.spk + "-" + args.spk + "-" + args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_cyc.shape)
                write_hdf5(feat_file, args.string_path, feat_cyc)

                count += 1
Example #17
0
    def feature_extract(cpu, wav_list, arr, max_frame_list,
                        max_spc_frame_list):
        n_wav = len(wav_list)
        n_sample = 0
        n_frame = 0
        max_frame = 0
        max_spc_frame = 0
        count = 1
        melfb_t = np.linalg.pinv(
            librosa.filters.mel(args.fs, args.fftl, n_mels=args.mel_dim))
        for wav_name in wav_list:
            # load wavfile and apply low cut filter
            fs, x = read_wav(wav_name, cutoff=args.highpass_cutoff)
            n_sample += x.shape[0]
            logging.info("cpu-" + str(cpu + 1) + " " + str(len(wav_list)) +
                         " " + wav_name + " " + str(x.shape[0]) + " " +
                         str(n_sample) + " " + str(count))
            logging.info(wav_list)

            # check sampling frequency
            if not fs == args.fs:
                logging.info("ERROR: sampling frequency is not matched.")
                sys.exit(1)

            hdf5name = args.hdf5dir + "/" + os.path.basename(wav_name).replace(
                ".wav", ".h5")

            if not args.init:
                if args.minf0 != 40 and args.maxf0 != 700:
                    time_axis_range, f0_range, spc_range, ap_range = analyze_range(x, fs=fs, minf0=args.minf0, \
                                                        maxf0=args.maxf0, fperiod=args.shiftms, fftl=args.fftl)
                else:
                    logging.info('open spk')
                    time_axis_range, f0_range, spc_range, ap_range = analyze(
                        x, fs=fs, fperiod=args.shiftms, fftl=args.fftl)
                write_hdf5(hdf5name, "/f0_range", f0_range)
                write_hdf5(hdf5name, "/time_axis", time_axis_range)

                melmagsp = melsp(x,
                                 n_mels=args.mel_dim,
                                 n_fft=args.fftl,
                                 shiftms=args.shiftms,
                                 winms=args.winms,
                                 fs=fs)
                logging.info(melmagsp.shape)

                write_hdf5(hdf5name, "/log_1pmelmagsp",
                           np.log(1 + 10000 * melmagsp))

                uv_range, cont_f0_range = convert_continuos_f0(
                    np.array(f0_range))
                unique, counts = np.unique(uv_range, return_counts=True)
                logging.info(dict(zip(unique, counts)))
                cont_f0_lpf_range = \
                    low_pass_filter(cont_f0_range, int(1.0 / (args.shiftms * 0.001)), cutoff=20)

                mcep_range = ps.sp2mc(spc_range, args.mcep_dim,
                                      args.mcep_alpha)
                npow_range = spc2npow(spc_range)
                _, spcidx_range = extfrm(mcep_range,
                                         npow_range,
                                         power_threshold=args.pow)

                codeap_range = pw.code_aperiodicity(ap_range, fs)

                cont_f0_lpf_range = np.expand_dims(cont_f0_lpf_range, axis=-1)
                uv_range = np.expand_dims(uv_range, axis=-1)
                unique, counts = np.unique(uv_range, return_counts=True)
                logging.info(dict(zip(unique, counts)))

                feat_orglf0 = np.c_[uv_range,
                                    np.log(cont_f0_lpf_range), codeap_range,
                                    mcep_range]
                logging.info(feat_orglf0.shape)
                write_hdf5(hdf5name, "/feat_org_lf0", feat_orglf0)

                write_hdf5(hdf5name, "/spcidx_range", spcidx_range)

                logging.info(hdf5name)
                n_codeap = codeap_range.shape[-1]
                for i in range(n_codeap):
                    logging.info('codeap: %d' % (i + 1))
                    uv_codeap_i, cont_codeap_i = convert_continuos_codeap(
                        np.array(codeap_range[:, i]))
                    cont_codeap_i = np.log(
                        -np.clip(cont_codeap_i,
                                 a_min=np.amin(cont_codeap_i),
                                 a_max=MAX_CODEAP))
                    if i > 0:
                        cont_codeap = np.c_[
                            cont_codeap,
                            np.expand_dims(cont_codeap_i, axis=-1)]
                    else:
                        uv_codeap = np.expand_dims(uv_codeap_i, axis=-1)
                        cont_codeap = np.expand_dims(cont_codeap_i, axis=-1)
                    uv_codeap_i = np.expand_dims(uv_codeap_i, axis=-1)
                    unique, counts = np.unique(uv_codeap_i, return_counts=True)
                    logging.info(dict(zip(unique, counts)))
                    logging.info((uv_range == uv_codeap_i).all())
                    logging.info((uv_codeap == uv_codeap_i).all())
                    logging.info(uv_codeap.shape)
                    logging.info(cont_codeap.shape)
                feat_mceplf0cap = np.c_[uv_range,
                                        np.log(cont_f0_lpf_range), uv_codeap,
                                        cont_codeap, mcep_range]
                logging.info(feat_mceplf0cap.shape)
                write_hdf5(hdf5name, "/feat_mceplf0cap", feat_mceplf0cap)

                n_frame += feat_orglf0.shape[0]
                if max_frame < feat_orglf0.shape[0]:
                    max_frame = feat_orglf0.shape[0]
                if max_spc_frame < spcidx_range[0].shape[0]:
                    max_spc_frame = spcidx_range[0].shape[0]
                if args.highpass_cutoff != 0 and args.wavfiltdir is not None:
                    sf.write(
                        args.wavfiltdir + "/" + os.path.basename(wav_name), x,
                        fs, 'PCM_16')
                wavpath = args.wavdir + "/" + os.path.basename(wav_name)
                logging.info("cpu-" + str(cpu + 1) + " " + wavpath)
                sp_rec = ps.mc2sp(mcep_range, args.mcep_alpha, args.fftl)
                wav = np.clip(pw.synthesize(f0_range, sp_rec, ap_range, fs, frame_period=args.shiftms), \
                               -1, 1)
                logging.info(wavpath)
                sf.write(wavpath, wav, fs, 'PCM_16')

                recmagsp = np.matmul(melfb_t, melmagsp.T)
                hop_length = int((args.fs / 1000) * args.shiftms)
                win_length = int((args.fs / 1000) * args.winms)
                wav = np.clip(
                    librosa.core.griffinlim(recmagsp,
                                            hop_length=hop_length,
                                            win_length=win_length,
                                            window='hann'), -1, 1)
                wavpath = args.wavgfdir + "/" + os.path.basename(wav_name)
                logging.info(wavpath)
                sf.write(wavpath, wav, fs, 'PCM_16')
            else:
                time_axis, f0, spc, ap = analyze(x,
                                                 fs=fs,
                                                 fperiod=args.shiftms,
                                                 fftl=args.fftl)
                write_hdf5(hdf5name, "/f0", f0)
                npow = spc2npow(spc)
                write_hdf5(hdf5name, "/npow", npow)
                n_frame += f0.shape[0]
                if max_frame < f0.shape[0]:
                    max_frame = f0.shape[0]

            count += 1
        arr[0] += n_wav
        arr[1] += n_sample
        arr[2] += n_frame
        max_frame_list.append(max_frame)
        max_spc_frame_list.append(max_spc_frame)
        if (n_wav > 0):
            logging.info(str(arr[0])+" "+str(n_wav)+" "+str(arr[1])+" "+str(n_sample/n_wav)+" "+\
                    str(arr[2])+" "+str(n_frame/n_wav)+" max_frame = "+str(max_frame)+" max_spc_frame = "+str(max_spc_frame))
Example #18
0
    def decode_RNN(feat_list, gpu, cvlist=None,
            mcd_cvlist_src=None, mcdstd_cvlist_src=None, mcdpow_cvlist_src=None, mcdpowstd_cvlist_src=None,\
            mcd_cvlist_cyc=None, mcdstd_cvlist_cyc=None, mcdpow_cvlist_cyc=None, mcdpowstd_cvlist_cyc=None,\
            mcd_cvlist=None, mcdstd_cvlist=None, mcdpow_cvlist=None, mcdpowstd_cvlist=None, \
            lat_dist_rmse_list=None, lat_dist_cosim_list=None):
        with torch.cuda.device(gpu):
            # define model and load parameters
            with torch.no_grad():
                model_encoder = GRU_VAE_ENCODER(
                    in_dim=config.mcep_dim+config.excit_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_enc)
                logging.info(model_encoder)
                model_decoder = GRU_SPEC_DECODER(
                    feat_dim=config.lat_dim,
                    out_dim=config.mcep_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_dec,
                    hidden_units=config.hidden_units_dec,
                    kernel_size=config.kernel_size_dec,
                    dilation_size=config.dilation_size_dec,
                    causal_conv=config.causal_conv_dec,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_dec)
                logging.info(model_decoder)
                model_post = GRU_POST_NET(
                    spec_dim=config.mcep_dim,
                    excit_dim=2,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_post,
                    hidden_units=config.hidden_units_post,
                    kernel_size=config.kernel_size_post,
                    dilation_size=config.dilation_size_post,
                    causal_conv=config.causal_conv_post,
                    pad_first=True,
                    right_size=config.right_size_post)
                    #excit_dim=config.excit_dim,
                    #excit_dim=None,
                logging.info(model_post)
                model_encoder.load_state_dict(torch.load(args.model)["model_encoder"])
                model_decoder.load_state_dict(torch.load(args.model)["model_decoder"])
                model_post.load_state_dict(torch.load(args.model)["model_post"])
                model_encoder.remove_weight_norm()
                model_decoder.remove_weight_norm()
                model_post.remove_weight_norm()
                model_encoder.cuda()
                model_decoder.cuda()
                model_post.cuda()
                model_encoder.eval()
                model_decoder.eval()
                model_post.eval()
                for param in model_encoder.parameters():
                    param.requires_grad = False
                for param in model_decoder.parameters():
                    param.requires_grad = False
                for param in model_post.parameters():
                    param.requires_grad = False
            count = 0
            pad_left = (model_encoder.pad_left + model_decoder.pad_left + model_post.pad_left)*2
            pad_right = (model_encoder.pad_right + model_decoder.pad_right + model_post.pad_right)*2
            outpad_lefts = [None]*5
            outpad_rights = [None]*5
            outpad_lefts[0] = pad_left-model_encoder.pad_left
            outpad_rights[0] = pad_right-model_encoder.pad_right
            outpad_lefts[1] = outpad_lefts[0]-model_decoder.pad_left
            outpad_rights[1] = outpad_rights[0]-model_decoder.pad_right
            outpad_lefts[2] = outpad_lefts[1]-model_post.pad_left
            outpad_rights[2] = outpad_rights[1]-model_post.pad_right
            outpad_lefts[3] = outpad_lefts[2]-model_encoder.pad_left
            outpad_rights[3] = outpad_rights[2]-model_encoder.pad_right
            outpad_lefts[4] = outpad_lefts[3]-model_decoder.pad_left
            outpad_rights[4] = outpad_rights[3]-model_decoder.pad_right
            logging.info(f'{pad_left} {pad_right}')
            logging.info(outpad_lefts)
            logging.info(outpad_rights)
            for feat_file in feat_list:
                # convert mcep
                spk_src = os.path.basename(os.path.dirname(feat_file))
                src_idx = spk_list.index(spk_src)
                logging.info('%s --> %s' % (spk_src, args.spk_trg))

                file_trg = os.path.join(os.path.dirname(os.path.dirname(feat_file)), args.spk_trg, os.path.basename(feat_file))
                trg_exist = False
                if os.path.exists(file_trg):
                    logging.info('exist: %s' % (file_trg))
                    feat_trg = read_hdf5(file_trg, config.string_path)
                    mcep_trg = feat_trg[:,-config.mcep_dim:]
                    logging.info(mcep_trg.shape)
                    trg_exist = True

                feat_org = read_hdf5(feat_file, config.string_path)
                mcep = np.array(feat_org[:,-config.mcep_dim:])
                codeap = np.array(np.rint(feat_org[:,2:3])*(-np.exp(feat_org[:,3:config.excit_dim])))
                sp = np.array(ps.mc2sp(mcep, args.mcep_alpha, args.fftl))
                ap = pw.decode_aperiodicity(codeap, args.fs, args.fftl)
                feat_cvf0_lin = np.expand_dims(convert_f0(np.exp(feat_org[:,1]), src_f0_mean, src_f0_std, trg_f0_mean, trg_f0_std), axis=-1)
                feat_cv = np.c_[feat_org[:,:1], np.log(feat_cvf0_lin), feat_org[:,2:config.excit_dim]]

                logging.info("generate")
                with torch.no_grad():
                    feat = F.pad(torch.FloatTensor(feat_org).cuda().unsqueeze(0).transpose(1,2), (pad_left,pad_right), "replicate").transpose(1,2)
                    feat_excit = torch.FloatTensor(feat_org[:,:config.excit_dim]).cuda().unsqueeze(0)
                    feat_excit_cv = torch.FloatTensor(feat_cv).cuda().unsqueeze(0)

                    spk_logits, _, lat_src, _ = model_encoder(feat, sampling=False)
                    logging.info('input spkpost')
                    if outpad_rights[0] > 0:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[0]:-outpad_rights[0]], dim=-1), 1))
                    else:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[0]:], dim=-1), 1))

                    if trg_exist:
                        spk_trg_logits, _, lat_trg, _ = model_encoder(F.pad(torch.FloatTensor(feat_trg).cuda().unsqueeze(0).transpose(1,2), \
                                                            (model_encoder.pad_left,model_encoder.pad_right), "replicate").transpose(1,2), sampling=False)
                        logging.info('target spkpost')
                        logging.info(torch.mean(F.softmax(spk_trg_logits, dim=-1), 1))

                    cvmcep_src, _ = model_decoder((torch.ones((1, lat_src.shape[1]))*src_idx).cuda().long(), lat_src)
                    cvmcep_src_post, _ = model_post(cvmcep_src, y=(torch.ones((1, cvmcep_src.shape[1]))*src_idx).cuda().long(),
                                        e=F.pad(feat_excit[:,:,:2].transpose(1,2), (outpad_lefts[1],outpad_rights[1]), "replicate").transpose(1,2))
                                        #e=F.pad(feat_excit.transpose(1,2), (outpad_lefts[1],outpad_rights[1]), "replicate").transpose(1,2))
                    if model_post.pad_right > 0:
                        spk_logits, _, lat_rec, _ = model_encoder(torch.cat((F.pad(feat_excit.transpose(1,2), \
                                            (outpad_lefts[2],outpad_rights[2]), "replicate").transpose(1,2), cvmcep_src[:,model_post.pad_left:-model_post.pad_right]), 2), 
                                                            sampling=False)
                    else:
                        spk_logits, _, lat_rec, _ = model_encoder(torch.cat((F.pad(feat_excit.transpose(1,2), \
                                            (outpad_lefts[2],outpad_rights[2]), "replicate").transpose(1,2), cvmcep_src[:,model_post.pad_left:]), 2), 
                                                            sampling=False)
                    logging.info('rec spkpost')
                    if outpad_rights[3] > 0:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[3]:-outpad_rights[3]], dim=-1), 1))
                    else:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[3]:], dim=-1), 1))

                    cvmcep, _ = model_decoder((torch.ones((1, lat_src.shape[1]))*trg_idx).cuda().long(), lat_src)
                    cvmcep_post, _ = model_post(cvmcep, y=(torch.ones((1, cvmcep.shape[1]))*trg_idx).cuda().long(),
                                        e=F.pad(feat_excit_cv[:,:,:2].transpose(1,2), (outpad_lefts[1],outpad_rights[1]), "replicate").transpose(1,2))
                                        #e=F.pad(feat_excit_cv.transpose(1,2), (outpad_lefts[1],outpad_rights[1]), "replicate").transpose(1,2))
                    if model_post.pad_right > 0:
                        spk_logits, _, lat_cv, _ = model_encoder(torch.cat((F.pad(feat_excit_cv.transpose(1,2), \
                                            (outpad_lefts[2],outpad_rights[2]), "replicate").transpose(1,2), cvmcep[:,model_post.pad_left:-model_post.pad_right]), 2), 
                                                            sampling=False)
                    else:
                        spk_logits, _, lat_cv, _ = model_encoder(torch.cat((F.pad(feat_excit_cv.transpose(1,2), \
                                            (outpad_lefts[2],outpad_rights[2]), "replicate").transpose(1,2), cvmcep[:,model_post.pad_left:]), 2), 
                                                            sampling=False)
                    logging.info('cv spkpost')
                    if outpad_rights[3] > 0:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[3]:-outpad_rights[3]], dim=-1), 1))
                    else:
                        logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[3]:], dim=-1), 1))

                    cvmcep_cyc, _ = model_decoder((torch.ones((1, lat_cv.shape[1]))*src_idx).cuda().long(), lat_cv)
                    cvmcep_cyc_post, _ = model_post(cvmcep_cyc, y=(torch.ones((1, cvmcep_cyc.shape[1]))*src_idx).cuda().long(),
                                        e=F.pad(feat_excit[:,:,:2].transpose(1,2), (outpad_lefts[4],outpad_rights[4]), "replicate").transpose(1,2))
                                        #e=F.pad(feat_excit.transpose(1,2), (outpad_lefts[4],outpad_rights[4]), "replicate").transpose(1,2))

                    if outpad_rights[2] > 0:
                        cvmcep_src = np.array(cvmcep_src_post[0,outpad_lefts[2]:-outpad_rights[2]].cpu().data.numpy(), dtype=np.float64)
                        cvmcep = np.array(cvmcep_post[0,outpad_lefts[2]:-outpad_rights[2]].cpu().data.numpy(), dtype=np.float64)
                    else:
                        cvmcep_src = np.array(cvmcep_src_post[0,outpad_lefts[2]:].cpu().data.numpy(), dtype=np.float64)
                        cvmcep = np.array(cvmcep_post[0,outpad_lefts[2]:].cpu().data.numpy(), dtype=np.float64)
                    cvmcep_cyc = np.array(cvmcep_cyc_post[0].cpu().data.numpy(), dtype=np.float64)

                    if trg_exist:
                        if outpad_rights[0] > 0:
                            lat_src = lat_src[:,outpad_lefts[0]:-outpad_rights[0]]
                        else:
                            lat_src = lat_src[:,outpad_lefts[0]:]

                logging.info(cvmcep_src.shape)
                logging.info(cvmcep.shape)
                logging.info(cvmcep_cyc.shape)

                if trg_exist:
                    logging.info(lat_src.shape)
                    logging.info(lat_trg.shape)
 
                cvlist.append(np.var(cvmcep[:,1:], axis=0))

                logging.info("cvf0lin")
                f0_range = read_hdf5(feat_file, "/f0_range")
                cvf0_range_lin = convert_f0(f0_range, src_f0_mean, src_f0_std, trg_f0_mean, trg_f0_std)
                uv_range_lin, cont_f0_range_lin = convert_continuos_f0(np.array(cvf0_range_lin))
                unique, counts = np.unique(uv_range_lin, return_counts=True)
                logging.info(dict(zip(unique, counts)))
                cont_f0_lpf_range_lin = \
                    low_pass_filter(cont_f0_range_lin, int(1.0 / (args.shiftms * 0.001)), cutoff=20)
                uv_range_lin = np.expand_dims(uv_range_lin, axis=-1)
                cont_f0_lpf_range_lin = np.expand_dims(cont_f0_lpf_range_lin, axis=-1)
                # plain converted feat for neural vocoder
                feat_cv = np.c_[uv_range_lin, np.log(cont_f0_lpf_range_lin), feat_cv[:,2:config.excit_dim], cvmcep]
                logging.info(feat_cv.shape)

                logging.info("mcd acc")
                spcidx = np.array(read_hdf5(feat_file, "/spcidx_range")[0])
                _, mcdpow_arr = dtw.calc_mcd(np.array(mcep[spcidx], dtype=np.float64), np.array(cvmcep_src[spcidx], dtype=np.float64))
                _, mcd_arr = dtw.calc_mcd(np.array(mcep[spcidx,1:], dtype=np.float64), np.array(cvmcep_src[spcidx,1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                logging.info("mcdpow_src_cv: %.6f dB +- %.6f" % (mcdpow_mean, mcdpow_std))
                logging.info("mcd_src_cv: %.6f dB +- %.6f" % (mcd_mean, mcd_std))
                mcdpow_cvlist_src.append(mcdpow_mean)
                mcdpowstd_cvlist_src.append(mcdpow_std)
                mcd_cvlist_src.append(mcd_mean)
                mcdstd_cvlist_src.append(mcd_std)
                if trg_exist:
                    spcidx_trg = np.array(read_hdf5(file_trg, "/spcidx_range")[0])
                    _, _, _, mcdpow_arr = dtw.dtw_org_to_trg(np.array(cvmcep[spcidx], \
                                                dtype=np.float64), np.array(mcep_trg[spcidx_trg], dtype=np.float64))
                    _, _, _, mcd_arr = dtw.dtw_org_to_trg(np.array(cvmcep[spcidx,1:], \
                                                dtype=np.float64), np.array(mcep_trg[spcidx_trg,1:], dtype=np.float64))
                    mcdpow_mean = np.mean(mcdpow_arr)
                    mcdpow_std = np.std(mcdpow_arr)
                    mcd_mean = np.mean(mcd_arr)
                    mcd_std = np.std(mcd_arr)
                    logging.info("mcdpow_trg: %.6f dB +- %.6f" % (mcdpow_mean, mcdpow_std))
                    logging.info("mcd_trg: %.6f dB +- %.6f" % (mcd_mean, mcd_std))
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpowstd_cvlist.append(mcdpow_std)
                    mcd_cvlist.append(mcd_mean)
                    mcdstd_cvlist.append(mcd_std)
                    spcidx_src = torch.LongTensor(spcidx).cuda()
                    spcidx_trg = torch.LongTensor(spcidx_trg).cuda()
                    trj_lat_src = np.array(torch.index_select(lat_src[0],0,spcidx_src).cpu().data.numpy(), dtype=np.float64)
                    trj_lat_trg = np.array(torch.index_select(lat_trg[0],0,spcidx_trg).cpu().data.numpy(), dtype=np.float64)
                    aligned_lat_srctrg, _, _, _ = dtw.dtw_org_to_trg(trj_lat_src, trj_lat_trg)
                    lat_dist_srctrg = np.mean(np.sqrt(np.mean((aligned_lat_srctrg-trj_lat_trg)**2, axis=0)))
                    _, _, lat_cdist_srctrg, _ = dtw.dtw_org_to_trg(trj_lat_trg, trj_lat_src, mcd=0)
                    aligned_lat_trgsrc, _, _, _ = dtw.dtw_org_to_trg(trj_lat_trg, trj_lat_src)
                    lat_dist_trgsrc = np.mean(np.sqrt(np.mean((aligned_lat_trgsrc-trj_lat_src)**2, axis=0)))
                    _, _, lat_cdist_trgsrc, _ = dtw.dtw_org_to_trg(trj_lat_src, trj_lat_trg, mcd=0)
                    logging.info("%lf %lf %lf %lf" % (lat_dist_srctrg, lat_cdist_srctrg, lat_dist_trgsrc, lat_cdist_trgsrc))
                    lat_dist_rmse = (lat_dist_srctrg+lat_dist_trgsrc)/2
                    lat_dist_cosim = (lat_cdist_srctrg+lat_cdist_trgsrc)/2
                    lat_dist_rmse_list.append(lat_dist_rmse)
                    lat_dist_cosim_list.append(lat_dist_cosim)
                    logging.info("lat_dist: %.6f %.6f" % (lat_dist_rmse, lat_dist_cosim))
                _, mcdpow_arr = dtw.calc_mcd(np.array(mcep[spcidx], dtype=np.float64), np.array(cvmcep_cyc[spcidx], dtype=np.float64))
                _, mcd_arr = dtw.calc_mcd(np.array(mcep[spcidx,1:], dtype=np.float64), np.array(cvmcep_cyc[spcidx,1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                logging.info("mcdpow_cyc_cv: %.6f dB +- %.6f" % (mcdpow_mean, mcdpow_std))
                logging.info("mcd_cyc_cv: %.6f dB +- %.6f" % (mcd_mean, mcd_std))
                mcdpow_cvlist_cyc.append(mcdpow_mean)
                mcdpowstd_cvlist_cyc.append(mcdpow_std)
                mcd_cvlist_cyc.append(mcd_mean)
                mcdstd_cvlist_cyc.append(mcd_std)

                logging.info("synth anasyn")
                wav = np.clip(pw.synthesize(f0_range, sp, ap, args.fs, frame_period=args.shiftms), -1, 1)
                wavpath = os.path.join(args.outdir,os.path.basename(feat_file).replace(".h5","_anasyn.wav"))
                sf.write(wavpath, wav, args.fs, 'PCM_16')
                logging.info(wavpath)

                logging.info("synth voco rec")
                cvsp_src = ps.mc2sp(cvmcep_src, args.mcep_alpha, args.fftl)
                logging.info(cvsp_src.shape)
                wav = np.clip(pw.synthesize(f0_range, cvsp_src, ap, args.fs, frame_period=args.shiftms), -1, 1)
                wavpath = os.path.join(args.outdir, os.path.basename(feat_file).replace(".h5", "_rec.wav"))
                sf.write(wavpath, wav, args.fs, 'PCM_16')
                logging.info(wavpath)

                logging.info("synth voco cv")
                cvsp = ps.mc2sp(cvmcep, args.mcep_alpha, args.fftl)
                logging.info(cvsp.shape)
                wav = np.clip(pw.synthesize(cvf0_range_lin, cvsp, ap, args.fs, frame_period=args.shiftms), -1, 1)
                wavpath = os.path.join(args.outdir, os.path.basename(feat_file).replace(".h5", "_cv.wav"))
                sf.write(wavpath, wav, args.fs, 'PCM_16')
                logging.info(wavpath)

                logging.info("synth voco cv GV")
                datamean = np.mean(cvmcep[:,1:], axis=0)
                cvmcep_gv =  np.c_[cvmcep[:,0], args.gv_coeff*(np.sqrt(gv_mean_trg/cvgv_mean) * \
                                    (cvmcep[:,1:]-datamean) + datamean) + (1-args.gv_coeff)*cvmcep[:,1:]]
                cvmcep_gv = mod_pow(cvmcep_gv, cvmcep, alpha=args.mcep_alpha, irlen=IRLEN)
                cvsp_gv = ps.mc2sp(cvmcep_gv, args.mcep_alpha, args.fftl)
                logging.info(cvsp_gv.shape)
                wav = np.clip(pw.synthesize(cvf0_range_lin, cvsp_gv, ap, args.fs, frame_period=args.shiftms), -1, 1)
                wavpath = os.path.join(args.outdir, os.path.basename(feat_file).replace(".h5", "_cvGV.wav"))
                sf.write(wavpath, wav, args.fs, 'PCM_16')
                logging.info(wavpath)

                #logging.info("synth diffGV")
                #shiftl = int(args.fs/1000*args.shiftms)
                #mc_cv_diff = cvmcep_gv-mcep
                #b = np.apply_along_axis(ps.mc2b, 1, mc_cv_diff, args.mcep_alpha)
                #logging.info(b.shape)
                #assert np.isfinite(b).all
                #mlsa_fil = ps.synthesis.Synthesizer(MLSADF(mcep_dim, alpha=args.mcep_alpha), shiftl)
                #x, fs_ = sf.read(os.path.join(os.path.dirname(feat_file).replace("hdf5", "wav_filtered"), os.path.basename(feat_file).replace(".h5", ".wav")))
                #assert(fs_ == args.fs)
                #wav = mlsa_fil.synthesis(x, b)
                #wav = np.clip(wav, -1, 1)
                #wavpath = os.path.join(args.outdir, os.path.basename(feat_file).replace(".h5", "_DiffGV.wav"))
                #sf.write(wavpath, wav, args.fs, 'PCM_16')
                #logging.info(wavpath)

                #logging.info("synth diffGVF0")
                #time_axis = read_hdf5(feat_file, "/time_axis")
                #sp_diff = pw.cheaptrick(wav, f0_range, time_axis, args.fs, fft_size=args.fftl)
                #logging.info(sp_diff.shape)
                #ap_diff = pw.d4c(wav, f0_range, time_axis, args.fs, fft_size=args.fftl)
                #logging.info(ap_diff.shape)
                #wav = pw.synthesize(cvf0_range_lin, sp_diff, ap_diff, args.fs, frame_period=args.shiftms)
                #wav = np.clip(wav, -1, 1)
                #wavpath = os.path.join(args.outdir,os.path.basename(feat_file).replace(".h5", "_DiffGVF0.wav"))
                #sf.write(wavpath, wav, args.fs, 'PCM_16')
                #logging.info(wavpath)

                #logging.info("analysis diffGVF0")
                #sp_diff_anasyn = pw.cheaptrick(wav, cvf0_range_lin, time_axis, args.fs, fft_size=args.fftl)
                #logging.info(sp_diff_anasyn.shape)
                #mc_cv_diff_anasyn = ps.sp2mc(sp_diff_anasyn, mcep_dim, args.mcep_alpha)
                #ap_diff_anasyn = pw.d4c(wav, cvf0_range_lin, time_axis, args.fs, fft_size=args.fftl)
                #code_ap_diff_anasyn = pw.code_aperiodicity(ap_diff_anasyn, args.fs)
                ## convert to continouos codeap with uv
                #for i in range(code_ap_diff_anasyn.shape[-1]):
                #    logging.info('codeap: %d' % (i+1))
                #    uv_codeap_i, cont_codeap_i = convert_continuos_codeap(np.array(code_ap_diff_anasyn[:,i]))
                #    cont_codeap_i = np.log(-np.clip(cont_codeap_i, a_min=np.amin(cont_codeap_i), a_max=MAX_CODEAP))
                #    if i > 0:
                #        cont_codeap = np.c_[cont_codeap, np.expand_dims(cont_codeap_i, axis=-1)]
                #    else:
                #        uv_codeap = np.expand_dims(uv_codeap_i, axis=-1)
                #        cont_codeap = np.expand_dims(cont_codeap_i, axis=-1)
                #    uv_codeap_i = np.expand_dims(uv_codeap_i, axis=-1)
                #    unique, counts = np.unique(uv_codeap_i, return_counts=True)
                #    logging.info(dict(zip(unique, counts)))
                ## postprocessed converted feat for neural vocoder
                #feat_diffgv_anasyn = np.c_[feat_cv[:,:2], uv_codeap, cont_codeap, mc_cv_diff_anasyn]

                #logging.info("write lat")
                #outTxtDir = os.path.join(args.outdir, os.path.basename(os.path.dirname(feat_file)))
                #if not os.path.exists(outTxtDir):
                #    os.mkdir(outTxtDir)
                #outTxt = os.path.join(outTxtDir, os.path.basename(feat_file).replace(".wav", ".txt"))
                #logging.info(outTxt)
                #g = open(outTxt, "wt")
                #idx_frm = 0 
                #nfrm = trj_lat_src.shape[0]
                #dim = trj_lat_src.shape[1]
                #if not args.time_flag:
                ##if True:
                #    while idx_frm < nfrm:
                #        idx_elmt = 1 
                #        for elmt in trj_lat_src[idx_frm]:
                #            if idx_elmt < dim:
                #                g.write("%lf " % (elmt))
                #            else:
                #                g.write("%lf\n" % (elmt))
                #            idx_elmt += 1
                #        idx_frm += 1
                #else:
                #    while idx_frm < nfrm:
                #        idx_elmt = 1 
                #        for elmt in trj_lat_src[idx_frm]:
                #            if idx_elmt < dim:
                #                if idx_elmt > 1:
                #                    g.write("%lf " % (elmt))
                #                else:
                #                    g.write("%lf %lf " % (time_axis[idx_frm], elmt))
                #            else:
                #                g.write("%lf\n" % (elmt))
                #            idx_elmt += 1
                #        idx_frm += 1
                #g.close()

                logging.info('write to h5')
                outh5dir = os.path.join(os.path.dirname(os.path.dirname(feat_file)), spk_src+"-"+args.spk_trg)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                # cv
                write_path = args.string_path
                logging.info(feat_file + ' ' + write_path)
                logging.info(feat_cv.shape)
                write_hdf5(feat_file, write_path, feat_cv)
                ## diffGVF0
                #write_path = args.string_path+"_diffgvf0"
                #logging.info(feat_file + ' ' + write_path)
                #logging.info(feat_diffgv_anasyn.shape)
                #write_hdf5(feat_file, write_path, feat_diffgv_anasyn)

                count += 1
Example #19
0
def main():
    parser = argparse.ArgumentParser()
    # decode setting
    parser.add_argument("--feats",
                        required=True,
                        type=str,
                        help="list or directory of source eval feat files")
    parser.add_argument("--spk",
                        required=True,
                        type=str,
                        help="speaker name to be reconstructed")
    parser.add_argument("--model", required=True, type=str, help="model file")
    parser.add_argument("--config",
                        required=True,
                        type=str,
                        help="configure file")
    parser.add_argument("--n_gpus", default=1, type=int, help="number of gpus")
    parser.add_argument("--outdir",
                        required=True,
                        type=str,
                        help="directory to save log")
    parser.add_argument("--string_path",
                        required=True,
                        type=str,
                        help="path of h5 generated feature")
    # other setting
    parser.add_argument("--GPU_device",
                        default=None,
                        type=int,
                        help="selection of GPU device")
    parser.add_argument("--GPU_device_str",
                        default=None,
                        type=str,
                        help="selection of GPU device")
    parser.add_argument("--verbose", default=1, type=int, help="log level")

    args = parser.parse_args()

    if args.GPU_device is not None or args.GPU_device_str is not None:
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        if args.GPU_device_str is None:
            os.environ["CUDA_VISIBLE_DEVICES"] = str(args.GPU_device)
        else:
            os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU_device_str

    # check directory existence
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    # set log level
    if args.verbose > 0:
        logging.basicConfig(
            level=logging.INFO,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filemode='w',
            filename=args.outdir + "/decode.log")
        logging.getLogger().addHandler(logging.StreamHandler())
    elif args.verbose > 1:
        logging.basicConfig(
            level=logging.DEBUG,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filemode='w',
            filename=args.outdir + "/decode.log")
        logging.getLogger().addHandler(logging.StreamHandler())
    else:
        logging.basicConfig(
            level=logging.WARN,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filemode='w',
            filename=args.outdir + "/decode.log")
        logging.getLogger().addHandler(logging.StreamHandler())
        logging.warn("logging is disabled.")

    # load config
    config = torch.load(args.config)

    # get source feat list
    if os.path.isdir(args.feats):
        feat_list = sorted(find_files(args.feats, "*.h5"))
    elif os.path.isfile(args.feats):
        feat_list = read_txt(args.feats)
    else:
        logging.error("--feats should be directory or list.")
        sys.exit(1)

    # prepare the file list for parallel decoding
    feat_lists = np.array_split(feat_list, args.n_gpus)
    feat_lists = [f_list.tolist() for f_list in feat_lists]
    for i in range(args.n_gpus):
        logging.info('%d: %d' % (i + 1, len(feat_lists[i])))

    spk_list = config.spk_list.split('@')
    n_spk = len(spk_list)
    spk_idx = spk_list.index(args.spk)

    stats_list = config.stats_list.split('@')
    assert (n_spk == len(stats_list))

    spk_stat = stats_list[spk_idx]
    gv_mean = read_hdf5(spk_stat, "/gv_melsp_mean")

    model_epoch = os.path.basename(args.model).split('.')[0].split('-')[1]
    logging.info('epoch: ' + model_epoch)

    model_name = os.path.basename(os.path.dirname(args.model)).split('_')[1]
    logging.info('mdl_name: ' + model_name)

    logging.info(config)

    # define gpu decode function
    def gpu_decode(feat_list,
                   gpu,
                   cvlist=None,
                   lsd_cvlist=None,
                   lsdstd_cvlist=None,
                   cvlist_dv=None,
                   lsd_cvlist_dv=None,
                   lsdstd_cvlist_dv=None,
                   f0rmse_cvlist=None,
                   f0corr_cvlist=None,
                   caprmse_cvlist=None,
                   f0rmse_cvlist_dv=None,
                   f0corr_cvlist_dv=None,
                   caprmse_cvlist_dv=None,
                   cvlist_cyc=None,
                   lsd_cvlist_cyc=None,
                   lsdstd_cvlist_cyc=None,
                   cvlist_cyc_dv=None,
                   lsd_cvlist_cyc_dv=None,
                   lsdstd_cvlist_cyc_dv=None,
                   f0rmse_cvlist_cyc=None,
                   f0corr_cvlist_cyc=None,
                   caprmse_cvlist_cyc=None,
                   f0rmse_cvlist_cyc_dv=None,
                   f0corr_cvlist_cyc_dv=None,
                   caprmse_cvlist_cyc_dv=None):
        with torch.cuda.device(gpu):
            # define model and load parameters
            with torch.no_grad():
                model_encoder_melsp = GRU_VAE_ENCODER(
                    in_dim=config.mel_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_enc)
                logging.info(model_encoder_melsp)
                model_decoder_melsp = GRU_SPEC_DECODER(
                    feat_dim=config.lat_dim + config.lat_dim_e,
                    excit_dim=config.excit_dim,
                    out_dim=config.mel_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_dec,
                    hidden_units=config.hidden_units_dec,
                    kernel_size=config.kernel_size_dec,
                    dilation_size=config.dilation_size_dec,
                    causal_conv=config.causal_conv_dec,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_dec)
                logging.info(model_decoder_melsp)
                model_encoder_excit = GRU_VAE_ENCODER(
                    in_dim=config.mel_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim_e,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_enc)
                logging.info(model_encoder_excit)
                model_decoder_excit = GRU_EXCIT_DECODER(
                    feat_dim=config.lat_dim_e,
                    cap_dim=config.cap_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_lf0,
                    hidden_units=config.hidden_units_lf0,
                    kernel_size=config.kernel_size_lf0,
                    dilation_size=config.dilation_size_lf0,
                    causal_conv=config.causal_conv_lf0,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_lf0)
                logging.info(model_decoder_excit)
                if (config.spkidtr_dim > 0):
                    model_spkidtr = SPKID_TRANSFORM_LAYER(
                        n_spk=n_spk, spkidtr_dim=config.spkidtr_dim)
                    logging.info(model_spkidtr)
                model_encoder_melsp.load_state_dict(
                    torch.load(args.model)["model_encoder_melsp"])
                model_decoder_melsp.load_state_dict(
                    torch.load(args.model)["model_decoder_melsp"])
                model_encoder_excit.load_state_dict(
                    torch.load(args.model)["model_encoder_excit"])
                model_decoder_excit.load_state_dict(
                    torch.load(args.model)["model_decoder_excit"])
                if (config.spkidtr_dim > 0):
                    model_spkidtr.load_state_dict(
                        torch.load(args.model)["model_spkidtr"])
                model_encoder_melsp.cuda()
                model_decoder_melsp.cuda()
                model_encoder_excit.cuda()
                model_decoder_excit.cuda()
                if (config.spkidtr_dim > 0):
                    model_spkidtr.cuda()
                model_encoder_melsp.eval()
                model_decoder_melsp.eval()
                model_encoder_excit.eval()
                model_decoder_excit.eval()
                if (config.spkidtr_dim > 0):
                    model_spkidtr.eval()
                for param in model_encoder_melsp.parameters():
                    param.requires_grad = False
                for param in model_decoder_melsp.parameters():
                    param.requires_grad = False
                for param in model_encoder_excit.parameters():
                    param.requires_grad = False
                for param in model_decoder_excit.parameters():
                    param.requires_grad = False
                if (config.spkidtr_dim > 0):
                    for param in model_spkidtr.parameters():
                        param.requires_grad = False
            count = 0
            pad_left = (model_encoder_melsp.pad_left +
                        model_decoder_excit.pad_left +
                        model_decoder_melsp.pad_left) * 2
            pad_right = (model_encoder_melsp.pad_right +
                         model_decoder_excit.pad_right +
                         model_decoder_melsp.pad_right) * 2
            outpad_lefts = [None] * 5
            outpad_rights = [None] * 5
            outpad_lefts[0] = pad_left - model_encoder_melsp.pad_left
            outpad_rights[0] = pad_right - model_encoder_melsp.pad_right
            outpad_lefts[1] = outpad_lefts[0] - model_decoder_excit.pad_left
            outpad_rights[1] = outpad_rights[0] - model_decoder_excit.pad_right
            outpad_lefts[2] = outpad_lefts[1] - model_decoder_melsp.pad_left
            outpad_rights[2] = outpad_rights[1] - model_decoder_melsp.pad_right
            outpad_lefts[3] = outpad_lefts[2] - model_encoder_melsp.pad_left
            outpad_rights[3] = outpad_rights[2] - model_encoder_melsp.pad_right
            outpad_lefts[4] = outpad_lefts[3] - model_decoder_excit.pad_left
            outpad_rights[4] = outpad_rights[3] - model_decoder_excit.pad_right
            for feat_file in feat_list:
                # reconst. melsp
                logging.info("recmelsp " + feat_file)

                feat_org = read_hdf5(feat_file, "/log_1pmelmagsp")
                logging.info(feat_org.shape)

                with torch.no_grad():
                    feat = F.pad(
                        torch.FloatTensor(feat_org).cuda().unsqueeze(
                            0).transpose(1, 2), (pad_left, pad_right),
                        "replicate").transpose(1, 2)

                    spk_logits, _, lat_src, _ = model_encoder_melsp(
                        feat, sampling=False)
                    spk_logits_e, _, lat_src_e, _ = model_encoder_excit(
                        feat, sampling=False)
                    logging.info('input spkpost')
                    if outpad_rights[0] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[0]:
                                                     -outpad_rights[0]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[0]:],
                                          dim=-1), 1))
                    logging.info('input spkpost_e')
                    if outpad_rights[0] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits_e[:, outpad_lefts[0]:
                                                       -outpad_rights[0]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits_e[:, outpad_lefts[0]:],
                                          dim=-1), 1))

                    if config.spkidtr_dim > 0:
                        src_code = model_spkidtr((torch.ones(
                            (1, lat_src_e.shape[1])) * spk_idx).cuda().long())
                    else:
                        src_code = (torch.ones(
                            (1, lat_src_e.shape[1])) * spk_idx).cuda().long()
                    cvlf0_src, _ = model_decoder_excit(src_code, lat_src_e)

                    if model_decoder_excit.pad_right > 0:
                        lat_cat = torch.cat((
                            lat_src_e[:, model_decoder_excit.
                                      pad_left:-model_decoder_excit.pad_right],
                            lat_src[:, model_decoder_excit.
                                    pad_left:-model_decoder_excit.pad_right]),
                                            2)
                    else:
                        lat_cat = torch.cat(
                            (lat_src_e[:, model_decoder_excit.pad_left:],
                             lat_src[:, model_decoder_excit.pad_left:]), 2)
                    if config.spkidtr_dim > 0:
                        src_code = model_spkidtr((torch.ones(
                            (1, lat_cat.shape[1])) * spk_idx).cuda().long())
                    else:
                        src_code = (torch.ones(
                            (1, lat_cat.shape[1])) * spk_idx).cuda().long()
                    cvmelsp_src, _ = model_decoder_melsp(
                        lat_cat,
                        y=src_code,
                        e=cvlf0_src[:, :, :config.excit_dim])

                    spk_logits, _, lat_rec, _ = model_encoder_melsp(
                        cvmelsp_src, sampling=False)
                    spk_logits_e, _, lat_rec_e, _ = model_encoder_excit(
                        cvmelsp_src, sampling=False)
                    logging.info('rec spkpost')
                    if outpad_rights[3] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[3]:
                                                     -outpad_rights[3]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[3]:],
                                          dim=-1), 1))
                    logging.info('rec spkpost_e')
                    if outpad_rights[3] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits_e[:, outpad_lefts[3]:
                                                       -outpad_rights[3]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits_e[:, outpad_lefts[3]:],
                                          dim=-1), 1))

                    if config.spkidtr_dim > 0:
                        src_code = model_spkidtr((torch.ones(
                            (1, lat_rec_e.shape[1])) * spk_idx).cuda().long())
                    else:
                        src_code = (torch.ones(
                            (1, lat_rec_e.shape[1])) * spk_idx).cuda().long()
                    cvlf0_cyc, _ = model_decoder_excit(src_code, lat_rec_e)

                    if model_decoder_excit.pad_right > 0:
                        lat_cat = torch.cat((
                            lat_rec_e[:, model_decoder_excit.
                                      pad_left:-model_decoder_excit.pad_right],
                            lat_rec[:, model_decoder_excit.
                                    pad_left:-model_decoder_excit.pad_right]),
                                            2)
                    else:
                        lat_cat = torch.cat(
                            (lat_rec_e[:, model_decoder_excit.pad_left:],
                             lat_rec[:, model_decoder_excit.pad_left:]), 2)
                    if config.spkidtr_dim > 0:
                        src_code = model_spkidtr((torch.ones(
                            (1, lat_cat.shape[1])) * spk_idx).cuda().long())
                    else:
                        src_code = (torch.ones(
                            (1, lat_cat.shape[1])) * spk_idx).cuda().long()
                    cvmelsp_cyc, _ = model_decoder_melsp(
                        lat_cat,
                        y=src_code,
                        e=cvlf0_cyc[:, :, :config.excit_dim])

                    if outpad_rights[1] > 0:
                        cvlf0_src = cvlf0_src[:, outpad_lefts[1]:
                                              -outpad_rights[1]]
                    else:
                        cvlf0_src = cvlf0_src[:, outpad_lefts[1]:]
                    if outpad_rights[2] > 0:
                        cvmelsp_src = cvmelsp_src[:, outpad_lefts[2]:
                                                  -outpad_rights[2]]
                    else:
                        cvmelsp_src = cvmelsp_src[:, outpad_lefts[2]:]
                    if outpad_rights[4] > 0:
                        cvlf0_cyc = cvlf0_cyc[:, outpad_lefts[4]:
                                              -outpad_rights[4]]
                    else:
                        cvlf0_cyc = cvlf0_cyc[:, outpad_lefts[4]:]

                    feat_rec = cvmelsp_src[0].cpu().data.numpy()
                    feat_cyc = cvmelsp_cyc[0].cpu().data.numpy()

                    cvmelsp_src = np.array(cvmelsp_src[0].cpu().data.numpy(),
                                           dtype=np.float64)
                    cvlf0_src = np.array(cvlf0_src[0].cpu().data.numpy(),
                                         dtype=np.float64)

                    cvmelsp_cyc = np.array(cvmelsp_cyc[0].cpu().data.numpy(),
                                           dtype=np.float64)
                    cvlf0_cyc = np.array(cvlf0_cyc[0].cpu().data.numpy(),
                                         dtype=np.float64)

                logging.info(cvlf0_src.shape)
                logging.info(cvmelsp_src.shape)

                logging.info(cvlf0_cyc.shape)
                logging.info(cvmelsp_cyc.shape)

                melsp = np.array(feat_org)

                feat_world = read_hdf5(feat_file, "/feat_mceplf0cap")
                f0 = np.array(
                    np.rint(feat_world[:, 0]) * np.exp(feat_world[:, 1]))
                codeap = np.array(
                    np.rint(feat_world[:, 2:3]) *
                    (-np.exp(feat_world[:, 3:config.full_excit_dim])))

                cvf0_src = np.array(
                    np.rint(cvlf0_src[:, 0]) * np.exp(cvlf0_src[:, 1]))
                cvcodeap_src = np.array(
                    np.rint(cvlf0_src[:, 2:3]) * (-np.exp(cvlf0_src[:, 3:])))
                f0_rmse = np.sqrt(np.mean((cvf0_src - f0)**2))
                logging.info('F0_rmse_rec: %lf Hz' % (f0_rmse))
                cvf0_src_mean = np.mean(cvf0_src)
                f0_mean = np.mean(f0)
                f0_corr = np.sum((cvf0_src-cvf0_src_mean)*(f0-f0_mean))/\
                            (np.sqrt(np.sum((cvf0_src-cvf0_src_mean)**2))*np.sqrt(np.sum((f0-f0_mean)**2)))
                logging.info('F0_corr_rec: %lf' % (f0_corr))

                codeap_rmse = np.sqrt(
                    np.mean((cvcodeap_src - codeap)**2, axis=0))
                for i in range(codeap_rmse.shape[-1]):
                    logging.info('codeap-%d_rmse_rec: %lf dB' %
                                 (i + 1, codeap_rmse[i]))

                cvf0_cyc = np.array(
                    np.rint(cvlf0_cyc[:, 0]) * np.exp(cvlf0_cyc[:, 1]))
                cvcodeap_cyc = np.array(
                    np.rint(cvlf0_cyc[:, 2:3]) * (-np.exp(cvlf0_cyc[:, 3:])))
                f0_rmse_cyc = np.sqrt(np.mean((cvf0_cyc - f0)**2))
                logging.info('F0_rmse_cyc: %lf Hz' % (f0_rmse_cyc))
                cvf0_cyc_mean = np.mean(cvf0_cyc)
                f0_mean = np.mean(f0)
                f0_corr_cyc = np.sum((cvf0_cyc-cvf0_cyc_mean)*(f0-f0_mean))/\
                            (np.sqrt(np.sum((cvf0_cyc-cvf0_cyc_mean)**2))*np.sqrt(np.sum((f0-f0_mean)**2)))
                logging.info('F0_corr_cyc: %lf' % (f0_corr_cyc))

                codeap_rmse_cyc = np.sqrt(
                    np.mean((cvcodeap_cyc - codeap)**2, axis=0))
                for i in range(codeap_rmse_cyc.shape[-1]):
                    logging.info('codeap-%d_rmse_cyc: %lf dB' %
                                 (i + 1, codeap_rmse_cyc[i]))

                spcidx = np.array(read_hdf5(feat_file, "/spcidx_range")[0])

                melsp_rest = (np.exp(melsp) - 1) / 10000
                melsp_src_rest = (np.exp(cvmelsp_src) - 1) / 10000
                melsp_cyc_rest = (np.exp(cvmelsp_cyc) - 1) / 10000

                lsd_arr = np.sqrt(np.mean((20*(np.log10(np.clip(melsp_src_rest[spcidx], a_min=1e-16, a_max=None))\
                                                         -np.log10(np.clip(melsp_rest[spcidx], a_min=1e-16, a_max=None))))**2, axis=-1))
                lsd_mean = np.mean(lsd_arr)
                lsd_std = np.std(lsd_arr)
                logging.info("lsd_rec: %.6f dB +- %.6f" % (lsd_mean, lsd_std))

                lsd_arr = np.sqrt(np.mean((20*(np.log10(np.clip(melsp_cyc_rest[spcidx], a_min=1e-16, a_max=None))\
                                                         -np.log10(np.clip(melsp_rest[spcidx], a_min=1e-16, a_max=None))))**2, axis=-1))
                lsd_mean_cyc = np.mean(lsd_arr)
                lsd_std_cyc = np.std(lsd_arr)
                logging.info("lsd_cyc: %.6f dB +- %.6f" %
                             (lsd_mean_cyc, lsd_std_cyc))

                logging.info('org f0')
                logging.info(f0[10:15])
                logging.info('rec f0')
                logging.info(cvf0_src[10:15])
                logging.info('cyc f0')
                logging.info(cvf0_cyc[10:15])
                logging.info('org cap')
                logging.info(codeap[10:15])
                logging.info('rec cap')
                logging.info(cvcodeap_src[10:15])
                logging.info('cyc cap')
                logging.info(cvcodeap_cyc[10:15])

                dataset = feat_file.split('/')[1].split('_')[0]
                if 'tr' in dataset:
                    logging.info('trn')
                    f0rmse_cvlist.append(f0_rmse)
                    f0corr_cvlist.append(f0_corr)
                    caprmse_cvlist.append(codeap_rmse)
                    lsd_cvlist.append(lsd_mean)
                    lsdstd_cvlist.append(lsd_std)
                    cvlist.append(np.var(melsp_src_rest, axis=0))
                    logging.info(len(cvlist))
                    f0rmse_cvlist_cyc.append(f0_rmse_cyc)
                    f0corr_cvlist_cyc.append(f0_corr_cyc)
                    caprmse_cvlist_cyc.append(codeap_rmse_cyc)
                    lsd_cvlist_cyc.append(lsd_mean_cyc)
                    lsdstd_cvlist_cyc.append(lsd_std_cyc)
                    cvlist_cyc.append(np.var(melsp_cyc_rest, axis=0))
                elif 'dv' in dataset:
                    logging.info('dev')
                    f0rmse_cvlist_dv.append(f0_rmse)
                    f0corr_cvlist_dv.append(f0_corr)
                    caprmse_cvlist_dv.append(codeap_rmse)
                    lsd_cvlist_dv.append(lsd_mean)
                    lsdstd_cvlist_dv.append(lsd_std)
                    cvlist_dv.append(np.var(melsp_src_rest, axis=0))
                    logging.info(len(cvlist_dv))
                    f0rmse_cvlist_cyc_dv.append(f0_rmse_cyc)
                    f0corr_cvlist_cyc_dv.append(f0_corr_cyc)
                    caprmse_cvlist_cyc_dv.append(codeap_rmse_cyc)
                    lsd_cvlist_cyc_dv.append(lsd_mean_cyc)
                    lsdstd_cvlist_cyc_dv.append(lsd_std_cyc)
                    cvlist_cyc_dv.append(np.var(melsp_cyc_rest, axis=0))

                logging.info('write rec to h5')
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    args.spk + "-" + args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_rec.shape)
                write_hdf5(feat_file, args.string_path, feat_rec)

                logging.info('write cyc to h5')
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    args.spk + "-" + args.spk + "-" + args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_cyc.shape)
                write_hdf5(feat_file, args.string_path, feat_cyc)

                count += 1
                #if count >= 5:
                #    break

    # parallel decode training
    with mp.Manager() as manager:
        gpu = 0
        processes = []
        cvlist = manager.list()
        lsd_cvlist = manager.list()
        lsdstd_cvlist = manager.list()
        f0rmse_cvlist = manager.list()
        f0corr_cvlist = manager.list()
        caprmse_cvlist = manager.list()
        cvlist_dv = manager.list()
        lsd_cvlist_dv = manager.list()
        lsdstd_cvlist_dv = manager.list()
        f0rmse_cvlist_dv = manager.list()
        f0corr_cvlist_dv = manager.list()
        caprmse_cvlist_dv = manager.list()
        cvlist_cyc = manager.list()
        lsd_cvlist_cyc = manager.list()
        lsdstd_cvlist_cyc = manager.list()
        f0rmse_cvlist_cyc = manager.list()
        f0corr_cvlist_cyc = manager.list()
        caprmse_cvlist_cyc = manager.list()
        cvlist_cyc_dv = manager.list()
        lsd_cvlist_cyc_dv = manager.list()
        lsdstd_cvlist_cyc_dv = manager.list()
        f0rmse_cvlist_cyc_dv = manager.list()
        f0corr_cvlist_cyc_dv = manager.list()
        caprmse_cvlist_cyc_dv = manager.list()
        for i, feat_list in enumerate(feat_lists):
            logging.info(i)
            p = mp.Process(target=gpu_decode,
                           args=(
                               feat_list,
                               gpu,
                               cvlist,
                               lsd_cvlist,
                               lsdstd_cvlist,
                               cvlist_dv,
                               lsd_cvlist_dv,
                               lsdstd_cvlist_dv,
                               f0rmse_cvlist,
                               f0corr_cvlist,
                               caprmse_cvlist,
                               f0rmse_cvlist_dv,
                               f0corr_cvlist_dv,
                               caprmse_cvlist_dv,
                               cvlist_cyc,
                               lsd_cvlist_cyc,
                               lsdstd_cvlist_cyc,
                               cvlist_cyc_dv,
                               lsd_cvlist_cyc_dv,
                               lsdstd_cvlist_cyc_dv,
                               f0rmse_cvlist_cyc,
                               f0corr_cvlist_cyc,
                               caprmse_cvlist_cyc,
                               f0rmse_cvlist_cyc_dv,
                               f0corr_cvlist_cyc_dv,
                               caprmse_cvlist_cyc_dv,
                           ))
            p.start()
            processes.append(p)
            gpu += 1
            if (i + 1) % args.n_gpus == 0:
                gpu = 0
        # wait for all process
        for p in processes:
            p.join()

        # calculate cv_gv statistics
        if len(lsd_cvlist) > 0:
            logging.info("lsd_rec: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(lsd_cvlist)), \
                        np.std(np.array(lsd_cvlist)),np.mean(np.array(lsdstd_cvlist)),\
                        np.std(np.array(lsdstd_cvlist))))
            cvgv_mean = np.mean(np.array(cvlist), axis=0)
            cvgv_var = np.var(np.array(cvlist), axis=0)
            logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \
                                        np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean))))))
            logging.info("f0rmse_rec: %.6f Hz (+- %.6f)" % (np.mean(
                np.array(f0rmse_cvlist)), np.std(np.array(f0rmse_cvlist))))
            logging.info("f0corr_rec: %.6f (+- %.6f)" % (np.mean(
                np.array(f0corr_cvlist)), np.std(np.array(f0corr_cvlist))))
            caprmse_cvlist = np.array(caprmse_cvlist)
            for i in range(caprmse_cvlist.shape[-1]):
                logging.info("caprmse-%d_rec: %.6f dB (+- %.6f)" %
                             (i + 1, np.mean(caprmse_cvlist[:, i]),
                              np.std(caprmse_cvlist[:, i])))
            logging.info("lsd_cyc: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(lsd_cvlist_cyc)), \
                        np.std(np.array(lsd_cvlist_cyc)),np.mean(np.array(lsdstd_cvlist_cyc)),\
                        np.std(np.array(lsdstd_cvlist_cyc))))
            cvgv_mean = np.mean(np.array(cvlist_cyc), axis=0)
            cvgv_var = np.var(np.array(cvlist_cyc), axis=0)
            logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \
                                        np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean))))))
            logging.info("f0rmse_cyc: %.6f Hz (+- %.6f)" %
                         (np.mean(np.array(f0rmse_cvlist_cyc)),
                          np.std(np.array(f0rmse_cvlist_cyc))))
            logging.info("f0corr_cyc: %.6f (+- %.6f)" %
                         (np.mean(np.array(f0corr_cvlist_cyc)),
                          np.std(np.array(f0corr_cvlist_cyc))))
            caprmse_cvlist_cyc = np.array(caprmse_cvlist_cyc)
            for i in range(caprmse_cvlist_cyc.shape[-1]):
                logging.info("caprmse-%d_cyc: %.6f dB (+- %.6f)" %
                             (i + 1, np.mean(caprmse_cvlist_cyc[:, i]),
                              np.std(caprmse_cvlist_cyc[:, i])))

            cvgv_mean = np.mean(np.array(np.r_[cvlist, cvlist_cyc]), axis=0)
            cvgv_var = np.var(np.array(np.r_[cvlist, cvlist_cyc]), axis=0)
            logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \
                                        np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean))))))

            string_path = model_name+"-"+str(config.n_half_cyc)+"-"+str(config.lat_dim)+"-"+str(config.lat_dim_e)\
                            +"-"+str(config.spkidtr_dim)+"-"+model_epoch
            logging.info(string_path)

            string_mean = "/recgv_mean_" + string_path
            string_var = "/recgv_var_" + string_path
            write_hdf5(spk_stat, string_mean, cvgv_mean)
            write_hdf5(spk_stat, string_var, cvgv_var)

        if len(lsd_cvlist_dv) > 0:
            logging.info("lsd_rec_dv: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(lsd_cvlist_dv)), \
                        np.std(np.array(lsd_cvlist_dv)),np.mean(np.array(lsdstd_cvlist_dv)),\
                        np.std(np.array(lsdstd_cvlist_dv))))
            cvgv_mean = np.mean(np.array(cvlist_dv), axis=0)
            cvgv_var = np.var(np.array(cvlist_dv), axis=0)
            logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \
                                        np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean))))))
            logging.info("f0rmse_rec_dv: %.6f Hz (+- %.6f)" %
                         (np.mean(np.array(f0rmse_cvlist_dv)),
                          np.std(np.array(f0rmse_cvlist_dv))))
            logging.info("f0corr_rec_dv: %.6f (+- %.6f)" %
                         (np.mean(np.array(f0corr_cvlist_dv)),
                          np.std(np.array(f0corr_cvlist_dv))))
            caprmse_cvlist_dv = np.array(caprmse_cvlist_dv)
            for i in range(caprmse_cvlist.shape[-1]):
                logging.info("caprmse-%d_rec_dv: %.6f dB (+- %.6f)" %
                             (i + 1, np.mean(caprmse_cvlist_dv[:, i]),
                              np.std(caprmse_cvlist_dv[:, i])))
            logging.info("lsd_cyc_dv: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(lsd_cvlist_cyc_dv)), \
                        np.std(np.array(lsd_cvlist_cyc_dv)),np.mean(np.array(lsdstd_cvlist_cyc_dv)),\
                        np.std(np.array(lsdstd_cvlist_cyc_dv))))
            cvgv_mean = np.mean(np.array(cvlist_cyc_dv), axis=0)
            cvgv_var = np.var(np.array(cvlist_cyc_dv), axis=0)
            logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \
                                        np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean))))))
            logging.info("f0rmse_cyc_dv: %.6f Hz (+- %.6f)" %
                         (np.mean(np.array(f0rmse_cvlist_cyc_dv)),
                          np.std(np.array(f0rmse_cvlist_cyc_dv))))
            logging.info("f0corr_cyc_dv: %.6f (+- %.6f)" %
                         (np.mean(np.array(f0corr_cvlist_cyc_dv)),
                          np.std(np.array(f0corr_cvlist_cyc_dv))))
            caprmse_cvlist_cyc_dv = np.array(caprmse_cvlist_cyc_dv)
            for i in range(caprmse_cvlist_cyc_dv.shape[-1]):
                logging.info("caprmse-%d_cyc_dv: %.6f dB (+- %.6f)" %
                             (i + 1, np.mean(caprmse_cvlist_cyc_dv[:, i]),
                              np.std(caprmse_cvlist_cyc_dv[:, i])))
Example #20
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("--feats",
                        default=None,
                        required=True,
                        help="name of the list of hdf5 files")
    parser.add_argument("--stats",
                        default=None,
                        required=True,
                        help="filename of hdf5 format")
    parser.add_argument("--expdir",
                        required=True,
                        type=str,
                        help="directory to save the log")
    parser.add_argument("--stdim",
                        default=5,
                        type=int,
                        help="directory to save the log")
    parser.add_argument("--spkr",
                        default=None,
                        type=str,
                        help="directory to save the log")
    parser.add_argument("--verbose",
                        default=1,
                        type=int,
                        help="log message level")

    args = parser.parse_args()

    # set log level
    if args.verbose == 1:
        logging.basicConfig(
            level=logging.INFO,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filename=args.expdir + "/calc_stats.log")
        logging.getLogger().addHandler(logging.StreamHandler())
    elif args.verbose > 1:
        logging.basicConfig(
            level=logging.DEBUG,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filename=args.expdir + "/calc_stats.log")
        logging.getLogger().addHandler(logging.StreamHandler())
    else:
        logging.basicConfig(
            level=logging.WARN,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filename=args.expdir + "/calc_stats.log")
        logging.getLogger().addHandler(logging.StreamHandler())
        logging.warn("logging is disabled.")

    # read list and define scaler
    filenames = read_txt(args.feats)
    scaler_feat_org_lf0 = StandardScaler()
    logging.info("number of training utterances = " + str(len(filenames)))

    #var = []
    var_range = []
    f0s_range = np.empty((0))
    # process over all of data
    for filename in filenames:
        logging.info(filename)
        feat_org_lf0 = read_hdf5(filename, "/feat_org_lf0")
        scaler_feat_org_lf0.partial_fit(feat_org_lf0)
        mcep_range = feat_org_lf0[:, args.stdim:]
        var_range.append(np.var(mcep_range, axis=0))
        logging.info(mcep_range.shape)
        if check_hdf5(filename, "/f0_range"):
            f0_range = read_hdf5(filename, "/f0_range")
        else:
            f0_range = read_hdf5(filename, "/f0")
        nonzero_indices = np.nonzero(f0_range)
        logging.info(f0_range[nonzero_indices].shape)
        logging.info(f0s_range.shape)
        f0s_range = np.concatenate([f0s_range, f0_range[nonzero_indices]])
        logging.info(f0s_range.shape)

    mean_feat_org_lf0 = scaler_feat_org_lf0.mean_
    scale_feat_org_lf0 = scaler_feat_org_lf0.scale_
    gv_range_mean = np.mean(np.array(var_range), axis=0)
    gv_range_var = np.var(np.array(var_range), axis=0)
    logging.info(gv_range_mean)
    logging.info(gv_range_var)
    f0_range_mean = np.mean(f0s_range)
    f0_range_std = np.std(f0s_range)
    logging.info(f0_range_mean)
    logging.info(f0_range_std)
    lf0_range_mean = np.mean(np.log(f0s_range))
    lf0_range_std = np.std(np.log(f0s_range))
    logging.info(lf0_range_mean)
    logging.info(lf0_range_std)
    logging.info(np.array_equal(f0_range_mean, np.exp(lf0_range_mean)))
    logging.info(np.array_equal(f0_range_std, np.exp(lf0_range_std)))

    logging.info(mean_feat_org_lf0)
    logging.info(scale_feat_org_lf0)
    write_hdf5(args.stats, "/mean_feat_org_lf0", mean_feat_org_lf0)
    write_hdf5(args.stats, "/scale_feat_org_lf0", scale_feat_org_lf0)
    write_hdf5(args.stats, "/gv_range_mean", gv_range_mean)
    write_hdf5(args.stats, "/gv_range_var", gv_range_var)
    write_hdf5(args.stats, "/f0_range_mean", f0_range_mean)
    write_hdf5(args.stats, "/f0_range_std", f0_range_std)
    write_hdf5(args.stats, "/lf0_range_mean", lf0_range_mean)
    write_hdf5(args.stats, "/lf0_range_std", lf0_range_std)
Example #21
0
    modulo_index = 0
    if len(sys.argv) > 3:
        modulo_index = max(0, int(sys.argv[3]))
        print('[Data] modulo index %d' % modulo_index)

    config_files = [config_file for config_file in os.listdir(config_folder)]
    config = utils.read_json(config_folder + config_files[-1])

    scaled_directory = config['scaled_directory'] + '/'
    assert os.path.exists(
        scaled_directory), 'directory %s does not exist' % scaled_directory

    depth_directory = config['depth_directory'] + '/'
    utils.makedir(depth_directory)

    off_files = utils.read_ordered_directory(scaled_directory)
    timer = Timer()

    Rs = get_views(config['watertight_rendering']['n_views'])

    for n in range(len(off_files)):
        if (n - modulo_index) % modulo_base == 0:
            timer.reset()
            mesh = Mesh.from_off(off_files[n])
            depths = render(mesh, Rs)

            depth_file = depth_directory + '%d.hdf5' % n
            utils.write_hdf5(depth_file, np.array(depths))
            print('[Data] wrote %s (%f seconds)' %
                  (depth_file, timer.elapsed()))
        truncation = config['truncation']
        sdfs = utils.read_hdf5(common.filename(config, 'sdf_file'))

        tsdfs = sdfs.copy()
        tsdfs[tsdfs > truncation] = truncation
        tsdfs[tsdfs < -truncation] = -truncation

        ltsdfs = tsdfs.copy()
        ltsdfs[ltsdfs > 0] = np.log(ltsdfs[ltsdfs > 0] + 1)
        ltsdfs[ltsdfs < 0] = -np.log(np.abs(ltsdfs[ltsdfs < 0]) + 1)

        tsdf_file = common.filename(config, 'tsdf_file')
        ltsdf_file = common.filename(config, 'ltsdf_file')

        utils.write_hdf5(tsdf_file, tsdfs)
        print('[Data] wrote ' + tsdf_file)
        utils.write_hdf5(ltsdf_file, ltsdfs)
        print('[Data] wrote ' + ltsdf_file)

    config_files = [
        config_file for config_file in os.listdir(config_folder)
        if config_file.find('prior') < 0
    ]
    for config_file in config_files:
        print('[Data] reading ' + config_folder + config_file)
        config = utils.read_json(config_folder + config_file)

        input_sdfs = utils.read_hdf5(common.filename(config, 'input_sdf_file'))

        input_tsdfs = input_sdfs.copy()
    def gpu_decode(feat_list, gpu, cvlist=None, mcdpow_cvlist=None, mcdpowstd_cvlist=None, mcd_cvlist=None, \
                    mcdstd_cvlist=None, cvlist_dv=None, mcdpow_cvlist_dv=None, mcdpowstd_cvlist_dv=None, \
                    mcd_cvlist_dv=None, mcdstd_cvlist_dv=None):
        with torch.cuda.device(gpu):
            mean_jnt = torch.FloatTensor(
                read_hdf5(args.stats_jnt,
                          "/mean_feat_org_lf0")[config.stdim:]).cuda()
            std_jnt = torch.FloatTensor(
                read_hdf5(args.stats_jnt,
                          "/scale_feat_org_lf0")[config.stdim:]).cuda()
            # define model and load parameters
            logging.info("model")
            logging.info(config)
            with torch.no_grad():
                model_encoder = GRU_RNN_STOCHASTIC(
                    in_dim=config.in_dim,
                    out_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers,
                    hidden_units=config.hidden_units,
                    kernel_size=config.kernel_size,
                    dilation_size=config.dilation_size,
                    spk_dim=n_spk,
                    scale_out_flag=False)
                model_decoder = GRU_RNN(in_dim=config.lat_dim + n_spk,
                                        out_dim=config.out_dim,
                                        hidden_layers=config.hidden_layers,
                                        hidden_units=config.hidden_units,
                                        kernel_size=config.kernel_size,
                                        dilation_size=config.dilation_size,
                                        scale_in_flag=False)
                logging.info(model_encoder)
                logging.info(model_decoder)
                model_encoder.load_state_dict(
                    torch.load(args.model)["model_encoder"])
                model_decoder.load_state_dict(
                    torch.load(args.model)["model_decoder"])
                model_encoder.cuda()
                model_decoder.cuda()
                model_encoder.eval()
                model_decoder.eval()
                for param in model_encoder.parameters():
                    param.requires_grad = False
                for param in model_decoder.parameters():
                    param.requires_grad = False
                init_pp = np.zeros((1, 1, config.lat_dim * 2 + n_spk))
                y_in_pp = torch.FloatTensor(init_pp).cuda()
                y_in_src = torch.unsqueeze(
                    torch.unsqueeze((0 - mean_jnt) / std_jnt, 0), 0)
            for feat_file in feat_list:
                # convert mcep
                logging.info("recmcep " + feat_file)

                feat = read_hdf5(feat_file, "/feat_org_lf0")
                logging.info(feat.shape)
                with torch.no_grad():
                    lat_feat, _, _, _, _ = model_encoder(torch.FloatTensor(feat).cuda(), \
                                                        y_in_pp, sampling=False)
                    spk_code = np.zeros((lat_feat.shape[0], n_spk))
                    spk_code[:, spk_code_idx] = 1
                    spk_code = torch.FloatTensor(spk_code).cuda()
                    cvmcep, _, _ = model_decoder(
                        torch.cat((spk_code, lat_feat), 1), y_in_src)

                cvmcep = np.array(cvmcep.cpu().data.numpy(), dtype=np.float64)
                logging.info(cvmcep.shape)

                mcep = feat[:, config.stdim:]
                spcidx = read_hdf5(feat_file, "/spcidx_range")[0]
                _, mcdpow_arr = dtw.calc_mcd(np.array(mcep[np.array(spcidx),:], dtype=np.float64), \
                                            np.array(cvmcep[np.array(spcidx),:], dtype=np.float64))
                _, mcd_arr = dtw.calc_mcd(np.array(mcep[np.array(spcidx),1:], dtype=np.float64), \
                                            np.array(cvmcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                dataset = feat_file.split('/')[1].split('_')[0]
                if 'tr' in dataset:
                    logging.info('trn')
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpowstd_cvlist.append(mcdpow_std)
                    mcd_cvlist.append(mcd_mean)
                    mcdstd_cvlist.append(mcd_std)
                    cvlist.append(np.var(cvmcep[:, 1:], axis=0))
                    logging.info(len(cvlist))
                elif 'dv' in dataset:
                    logging.info('dev')
                    mcdpow_cvlist_dv.append(mcdpow_mean)
                    mcdpowstd_cvlist_dv.append(mcdpow_std)
                    mcd_cvlist_dv.append(mcd_mean)
                    mcdstd_cvlist_dv.append(mcd_std)
                    cvlist_dv.append(np.var(cvmcep[:, 1:], axis=0))
                    logging.info(len(cvlist_dv))
                logging.info("mcdpow_rec: %.6f dB +- %.6f" %
                             (mcdpow_mean, mcdpow_std))
                logging.info("mcd_rec: %.6f dB +- %.6f" % (mcd_mean, mcd_std))

                logging.info("mod_pow")
                cvmcep = mod_pow(cvmcep,
                                 mcep,
                                 alpha=args.mcep_alpha,
                                 irlen=IRLEN)
                logging.info(cvmcep.shape)
                feat_cvmcep = np.c_[feat[:, :config.stdim], cvmcep]
                logging.info(feat_cvmcep.shape)
                write_path = '/feat_recmcep_cycvae-' + model_epoch
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    spk + "-" + spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + write_path)
                write_hdf5(feat_file, write_path, feat_cvmcep)
Example #24
0
    def gpu_decode(feat_list, gpu, cvlist=None, mcdpow_cvlist=None, mcdpowstd_cvlist=None, mcd_cvlist=None, \
                    mcdstd_cvlist=None, cvlist_dv=None, mcdpow_cvlist_dv=None, mcdpowstd_cvlist_dv=None, \
                    mcd_cvlist_dv=None, mcdstd_cvlist_dv=None, \
                    cvlist_cyc=None, mcdpow_cvlist_cyc=None, mcdpowstd_cvlist_cyc=None, mcd_cvlist_cyc=None, \
                    mcdstd_cvlist_cyc=None, cvlist_cyc_dv=None, mcdpow_cvlist_cyc_dv=None, mcdpowstd_cvlist_cyc_dv=None, \
                    mcd_cvlist_cyc_dv=None, mcdstd_cvlist_cyc_dv=None):
        with torch.cuda.device(gpu):
            # define model and load parameters
            with torch.no_grad():
                model_encoder = GRU_VAE_ENCODER(
                    in_dim=config.mcep_dim + config.excit_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_enc)
                logging.info(model_encoder)
                model_decoder = GRU_SPEC_DECODER(
                    feat_dim=config.lat_dim,
                    out_dim=config.mcep_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_dec,
                    hidden_units=config.hidden_units_dec,
                    kernel_size=config.kernel_size_dec,
                    dilation_size=config.dilation_size_dec,
                    causal_conv=config.causal_conv_dec,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_dec)
                logging.info(model_decoder)
                model_post = GRU_POST_NET(
                    spec_dim=config.mcep_dim,
                    excit_dim=2,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_post,
                    hidden_units=config.hidden_units_post,
                    kernel_size=config.kernel_size_post,
                    dilation_size=config.dilation_size_post,
                    causal_conv=config.causal_conv_post,
                    pad_first=True,
                    right_size=config.right_size_post)
                #excit_dim=config.excit_dim,
                #excit_dim=None,
                logging.info(model_post)
                model_encoder.load_state_dict(
                    torch.load(args.model)["model_encoder"])
                model_decoder.load_state_dict(
                    torch.load(args.model)["model_decoder"])
                model_post.load_state_dict(
                    torch.load(args.model)["model_post"])
                model_encoder.remove_weight_norm()
                model_decoder.remove_weight_norm()
                model_post.remove_weight_norm()
                model_encoder.cuda()
                model_decoder.cuda()
                model_post.cuda()
                model_encoder.eval()
                model_decoder.eval()
                model_post.eval()
                for param in model_encoder.parameters():
                    param.requires_grad = False
                for param in model_decoder.parameters():
                    param.requires_grad = False
                for param in model_post.parameters():
                    param.requires_grad = False
            count = 0
            pad_left = (model_encoder.pad_left + model_decoder.pad_left +
                        model_post.pad_left) * 2
            pad_right = (model_encoder.pad_right + model_decoder.pad_right +
                         model_post.pad_right) * 2
            outpad_lefts = [None] * 5
            outpad_rights = [None] * 5
            outpad_lefts[0] = pad_left - model_encoder.pad_left
            outpad_rights[0] = pad_right - model_encoder.pad_right
            outpad_lefts[1] = outpad_lefts[0] - model_decoder.pad_left
            outpad_rights[1] = outpad_rights[0] - model_decoder.pad_right
            outpad_lefts[2] = outpad_lefts[1] - model_post.pad_left
            outpad_rights[2] = outpad_rights[1] - model_post.pad_right
            outpad_lefts[3] = outpad_lefts[2] - model_encoder.pad_left
            outpad_rights[3] = outpad_rights[2] - model_encoder.pad_right
            outpad_lefts[4] = outpad_lefts[3] - model_decoder.pad_left
            outpad_rights[4] = outpad_rights[3] - model_decoder.pad_right
            logging.info(f'{pad_left} {pad_right}')
            logging.info(outpad_lefts)
            logging.info(outpad_rights)
            for feat_file in feat_list:
                # convert mcep
                logging.info("recmcep " + feat_file)

                feat_org = read_hdf5(feat_file, "/feat_mceplf0cap")
                logging.info(feat_org.shape)
                mcep = np.array(feat_org[:, -config.mcep_dim:])

                with torch.no_grad():
                    feat = F.pad(
                        torch.FloatTensor(feat_org).cuda().unsqueeze(
                            0).transpose(1, 2), (pad_left, pad_right),
                        "replicate").transpose(1, 2)
                    feat_excit = torch.FloatTensor(
                        feat_org[:, :config.excit_dim]).cuda().unsqueeze(0)

                    spk_logits, _, lat_src, _ = model_encoder(feat,
                                                              sampling=False)
                    logging.info('input spkpost')
                    if outpad_rights[0] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[0]:
                                                     -outpad_rights[0]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[0]:],
                                          dim=-1), 1))

                    cvmcep_src, _ = model_decoder((torch.ones(
                        (1, lat_src.shape[1])) * spk_idx).cuda().long(),
                                                  lat_src)
                    cvmcep_src_post, _ = model_post(
                        cvmcep_src,
                        y=(torch.ones(
                            (1, cvmcep_src.shape[1])) * spk_idx).cuda().long(),
                        e=F.pad(feat_excit[:, :, :2].transpose(1, 2),
                                (outpad_lefts[1], outpad_rights[1]),
                                "replicate").transpose(1, 2))
                    #e=F.pad(feat_excit.transpose(1,2), (outpad_lefts[1],outpad_rights[1]), "replicate").transpose(1,2))
                    if model_post.pad_right > 0:
                        spk_logits, _, lat_rec, _ = model_encoder(torch.cat((F.pad(feat_excit.transpose(1,2), \
                                            (outpad_lefts[2],outpad_rights[2]), "replicate").transpose(1,2), cvmcep_src[:,model_post.pad_left:-model_post.pad_right]), 2),
                                                            sampling=False)
                    else:
                        spk_logits, _, lat_rec, _ = model_encoder(torch.cat((F.pad(feat_excit.transpose(1,2), \
                                            (outpad_lefts[2],outpad_rights[2]), "replicate").transpose(1,2), cvmcep_src[:,model_post.pad_left:]), 2),
                                                            sampling=False)
                    logging.info('rec spkpost')
                    if outpad_rights[3] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[3]:
                                                     -outpad_rights[3]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[3]:],
                                          dim=-1), 1))

                    cvmcep_cyc, _ = model_decoder((torch.ones(
                        (1, lat_rec.shape[1])) * spk_idx).cuda().long(),
                                                  lat_rec)
                    cvmcep_cyc_post, _ = model_post(
                        cvmcep_cyc,
                        y=(torch.ones(
                            (1, cvmcep_cyc.shape[1])) * spk_idx).cuda().long(),
                        e=F.pad(feat_excit[:, :, :2].transpose(1, 2),
                                (outpad_lefts[4], outpad_rights[4]),
                                "replicate").transpose(1, 2))
                    #e=F.pad(feat_excit.transpose(1,2), (outpad_lefts[4],outpad_rights[4]), "replicate").transpose(1,2))

                    if outpad_rights[2] > 0:
                        feat_rec = torch.cat(
                            (feat_excit,
                             cvmcep_src_post[:,
                                             outpad_lefts[2]:-outpad_rights[2]]
                             ), 2)[0].cpu().data.numpy()
                    else:
                        feat_rec = torch.cat(
                            (feat_excit, cvmcep_src_post[:, outpad_lefts[2]:]),
                            2)[0].cpu().data.numpy()
                    feat_cyc = torch.cat((feat_excit, cvmcep_cyc_post),
                                         2)[0].cpu().data.numpy()

                    if outpad_rights[2] > 0:
                        cvmcep_src = np.array(
                            cvmcep_src_post[:,
                                            outpad_lefts[2]:-outpad_rights[2]]
                            [0].cpu().data.numpy(),
                            dtype=np.float64)
                    else:
                        cvmcep_src = np.array(cvmcep_src_post[:,
                                                              outpad_lefts[2]:]
                                              [0].cpu().data.numpy(),
                                              dtype=np.float64)
                    cvmcep_cyc = np.array(
                        cvmcep_cyc_post[0].cpu().data.numpy(),
                        dtype=np.float64)

                logging.info(cvmcep_src.shape)
                logging.info(cvmcep_cyc.shape)

                spcidx = read_hdf5(feat_file, "/spcidx_range")[0]

                _, _, _, mcdpow_arr = dtw.dtw_org_to_trg(np.array(cvmcep_src[np.array(spcidx),:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),:], dtype=np.float64))
                _, _, _, mcd_arr = dtw.dtw_org_to_trg(np.array(cvmcep_src[np.array(spcidx),1:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                logging.info("mcdpow_rec: %.6f dB +- %.6f" %
                             (mcdpow_mean, mcdpow_std))
                logging.info("mcd_rec: %.6f dB +- %.6f" % (mcd_mean, mcd_std))

                _, _, _, mcdpow_arr = dtw.dtw_org_to_trg(np.array(cvmcep_cyc[np.array(spcidx),:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),:], dtype=np.float64))
                _, _, _, mcd_arr = dtw.dtw_org_to_trg(np.array(cvmcep_cyc[np.array(spcidx),1:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean_cyc = np.mean(mcdpow_arr)
                mcdpow_std_cyc = np.std(mcdpow_arr)
                mcd_mean_cyc = np.mean(mcd_arr)
                mcd_std_cyc = np.std(mcd_arr)
                logging.info("mcdpow_cyc: %.6f dB +- %.6f" %
                             (mcdpow_mean_cyc, mcdpow_std_cyc))
                logging.info("mcd_cyc: %.6f dB +- %.6f" %
                             (mcd_mean_cyc, mcd_std_cyc))

                dataset = feat_file.split('/')[1].split('_')[0]
                if 'tr' in dataset:
                    logging.info('trn')
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpowstd_cvlist.append(mcdpow_std)
                    mcd_cvlist.append(mcd_mean)
                    mcdstd_cvlist.append(mcd_std)
                    cvlist.append(np.var(cvmcep_src[:, 1:], axis=0))
                    logging.info(len(cvlist))
                    mcdpow_cvlist_cyc.append(mcdpow_mean_cyc)
                    mcdpowstd_cvlist_cyc.append(mcdpow_std_cyc)
                    mcd_cvlist_cyc.append(mcd_mean_cyc)
                    mcdstd_cvlist_cyc.append(mcd_std_cyc)
                    cvlist_cyc.append(np.var(cvmcep_cyc[:, 1:], axis=0))
                elif 'dv' in dataset:
                    logging.info('dev')
                    mcdpow_cvlist_dv.append(mcdpow_mean)
                    mcdpowstd_cvlist_dv.append(mcdpow_std)
                    mcd_cvlist_dv.append(mcd_mean)
                    mcdstd_cvlist_dv.append(mcd_std)
                    cvlist_dv.append(np.var(cvmcep_src[:, 1:], axis=0))
                    logging.info(len(cvlist_dv))
                    mcdpow_cvlist_cyc_dv.append(mcdpow_mean_cyc)
                    mcdpowstd_cvlist_cyc_dv.append(mcdpow_std_cyc)
                    mcd_cvlist_cyc_dv.append(mcd_mean_cyc)
                    mcdstd_cvlist_cyc_dv.append(mcd_std_cyc)
                    cvlist_cyc_dv.append(np.var(cvmcep_cyc[:, 1:], axis=0))

                logging.info('write rec to h5')
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    args.spk + "-" + args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_rec.shape)
                write_hdf5(feat_file, args.string_path, feat_rec)

                logging.info('write cyc to h5')
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    args.spk + "-" + args.spk + "-" + args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_cyc.shape)
                write_hdf5(feat_file, args.string_path, feat_cyc)

                count += 1
Example #25
0
    data = data.reshape((shape[0], np.prod(np.array(shape[1:]))))
    print('[Validation] reshaped data ' + 'x'.join(map(str, data.shape)))

    mean_file = args.mean_file
    V_file = args.V_file
    var_file = args.var_file

    mean = utils.read_hdf5(mean_file)
    print('[Validation] read ' + mean_file)
    V = utils.read_hdf5(V_file)
    print('[Validation] read ' + V_file)
    var = utils.read_hdf5(var_file)[0]
    print('[Validation] read ' + var_file)
    print('[Validation] var is ' + str(var))

    I = np.eye(V.shape[1])
    M = V.T.dot(V) + I*var
    M_inv = np.linalg.inv(M)

    means = np.repeat(mean.reshape((mean.shape[0], 1)), data.shape[0], axis = 1)
    codes = M_inv.dot(V.T.dot(data.T - means))

    code_mean = np.mean(codes)
    code_var = np.var(codes)
    print('[Validation] codes: ' + str(code_mean) + ' / ' + str(code_var))

    preds = np.dot(V, codes) + means
    preds = preds.T

    utils.write_hdf5(args.output, preds.reshape(shape))
    print('[Validation] wrote ' + args.output)
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("--feats",
                        default=None,
                        required=True,
                        help="name of the list of hdf5 files")
    parser.add_argument("--stats",
                        default=None,
                        required=True,
                        help="filename of stats for hdf5 format")
    parser.add_argument("--expdir",
                        required=True,
                        type=str,
                        help="directory to save the log")
    parser.add_argument("--mcep_dim",
                        default=50,
                        type=int,
                        help="dimension of mel-cepstrum")
    parser.add_argument("--n_jobs",
                        default=10,
                        type=int,
                        help="number of parallel jobs")
    parser.add_argument("--verbose",
                        default=1,
                        type=int,
                        help="log message level")

    args = parser.parse_args()

    # set log level
    if args.verbose == 1:
        logging.basicConfig(
            level=logging.INFO,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filename=args.expdir + "/calc_stats.log")
        logging.getLogger().addHandler(logging.StreamHandler())
    elif args.verbose > 1:
        logging.basicConfig(
            level=logging.DEBUG,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filename=args.expdir + "/calc_stats.log")
        logging.getLogger().addHandler(logging.StreamHandler())
    else:
        logging.basicConfig(
            level=logging.WARN,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filename=args.expdir + "/calc_stats.log")
        logging.getLogger().addHandler(logging.StreamHandler())
        logging.warn("logging is disabled.")

    # read list and define scaler
    filenames = read_txt(args.feats)
    logging.info("number of training utterances = " + str(len(filenames)))

    def calc_stats(filenames, cpu, feat_mceplf0cap_list, feat_orglf0_list,
                   varmcep_list, f0_list, melsp_list, varmelsp_list,
                   melworldsp_list, varmelworldsp_list):
        feat_mceplf0cap_arr = None
        feat_orglf0_arr = None
        varmcep_arr = None
        f0_arr = None
        melsp_arr = None
        varmelsp_arr = None
        melworldsp_arr = None
        varmelworldsp_arr = None
        count = 0
        # process over all of data
        for filename in filenames:
            logging.info(filename)
            feat_mceplf0cap = read_hdf5(filename, "/feat_mceplf0cap")
            logging.info(feat_mceplf0cap.shape)
            feat_orglf0 = read_hdf5(filename, "/feat_org_lf0")
            logging.info(feat_orglf0.shape)
            melsp = read_hdf5(filename, "/log_1pmelmagsp")
            logging.info(melsp.shape)
            melworldsp = read_hdf5(filename, "/log_1pmelworldsp")
            logging.info(melworldsp.shape)
            if feat_mceplf0cap_arr is not None:
                feat_mceplf0cap_arr = np.r_[feat_mceplf0cap_arr,
                                            feat_mceplf0cap]
            else:
                feat_mceplf0cap_arr = feat_mceplf0cap
            if feat_orglf0_arr is not None:
                feat_orglf0_arr = np.r_[feat_orglf0_arr, feat_orglf0]
            else:
                feat_orglf0_arr = feat_orglf0
            logging.info('feat')
            logging.info(feat_mceplf0cap_arr.shape)
            logging.info(feat_orglf0_arr.shape)
            if varmcep_arr is not None:
                varmcep_arr = np.r_[varmcep_arr, np.var(feat_mceplf0cap[:,-args.mcep_dim:], \
                                        axis=0, keepdims=True)]
            else:
                varmcep_arr = np.var(feat_mceplf0cap[:, -args.mcep_dim:],
                                     axis=0,
                                     keepdims=True)
            logging.info('var')
            logging.info(varmcep_arr.shape)
            logging.info('f0')
            f0 = read_hdf5(filename, "/f0_range")
            logging.info(f0.shape)
            logging.info('f0 > 0')
            f0 = f0[np.nonzero(f0)]
            logging.info(f0.shape)
            if f0_arr is not None:
                f0_arr = np.r_[f0_arr, f0]
            else:
                f0_arr = f0
            logging.info(f0_arr.shape)
            if melsp_arr is not None:
                melsp_arr = np.r_[melsp_arr, melsp]
            else:
                melsp_arr = melsp
            logging.info(melsp_arr.shape)
            if varmelsp_arr is not None:
                varmelsp_arr = np.r_[varmelsp_arr, np.var((np.exp(melsp)-1)/10000, axis=0, \
                                        keepdims=True)]
            else:
                varmelsp_arr = np.var((np.exp(melsp) - 1) / 10000,
                                      axis=0,
                                      keepdims=True)
            logging.info('var melsp')
            logging.info(varmelsp_arr.shape)
            if melworldsp_arr is not None:
                melworldsp_arr = np.r_[melworldsp_arr, melworldsp]
            else:
                melworldsp_arr = melworldsp
            logging.info(melworldsp_arr.shape)
            if varmelworldsp_arr is not None:
                varmelworldsp_arr = np.r_[varmelworldsp_arr, np.var((np.exp(melworldsp)-1)/10000, axis=0, \
                                        keepdims=True)]
            else:
                varmelworldsp_arr = np.var((np.exp(melworldsp) - 1) / 10000,
                                           axis=0,
                                           keepdims=True)
            logging.info('var melworldsp')
            logging.info(varmelworldsp_arr.shape)
            count += 1
            logging.info("cpu %d %d %d %d %d %d %d %d %d %d" %
                         (cpu, count, len(feat_mceplf0cap_arr),
                          len(feat_orglf0_arr), len(varmcep_arr), len(f0_arr),
                          len(melsp_arr), len(varmelsp_arr),
                          len(melworldsp_arr), len(varmelworldsp_arr)))
            #if count >= 5:
            #    break

        feat_mceplf0cap_list.append(feat_mceplf0cap_arr)
        feat_orglf0_list.append(feat_orglf0_arr)
        varmcep_list.append(varmcep_arr)
        f0_list.append(f0_arr)
        melsp_list.append(melsp_arr)
        varmelsp_list.append(varmelsp_arr)
        melworldsp_list.append(melworldsp_arr)
        varmelworldsp_list.append(varmelworldsp_arr)

    # divie list
    feat_lists = np.array_split(filenames, args.n_jobs)
    feat_lists = [f_list.tolist() for f_list in feat_lists]

    for i in range(len(feat_lists)):
        logging.info("%d %d" % (i + 1, len(feat_lists[i])))

    # multi processing
    with mp.Manager() as manager:
        processes = []
        feat_mceplf0cap_list = manager.list()
        feat_orglf0_list = manager.list()
        varmcep_list = manager.list()
        f0_list = manager.list()
        melsp_list = manager.list()
        varmelsp_list = manager.list()
        melworldsp_list = manager.list()
        varmelworldsp_list = manager.list()
        for i, feat_list in enumerate(feat_lists):
            p = mp.Process(target=calc_stats,
                           args=(
                               feat_list,
                               i + 1,
                               feat_mceplf0cap_list,
                               feat_orglf0_list,
                               varmcep_list,
                               f0_list,
                               melsp_list,
                               varmelsp_list,
                               melworldsp_list,
                               varmelworldsp_list,
                           ))
            p.start()
            processes.append(p)

        # wait for all process
        for p in processes:
            p.join()

        feat_mceplf0cap = None
        for i in range(len(feat_mceplf0cap_list)):
            if feat_mceplf0cap_list[i] is not None:
                logging.info(i)
                logging.info(feat_mceplf0cap_list[i].shape)
                if feat_mceplf0cap is not None:
                    feat_mceplf0cap = np.r_[feat_mceplf0cap,
                                            feat_mceplf0cap_list[i]]
                else:
                    feat_mceplf0cap = feat_mceplf0cap_list[i]
        logging.info('feat mceplf0cap: %d' % (len(feat_mceplf0cap)))
        logging.info(feat_mceplf0cap.shape)

        feat_orglf0 = None
        for i in range(len(feat_orglf0_list)):
            if feat_orglf0_list[i] is not None:
                logging.info(i)
                logging.info(feat_orglf0_list[i].shape)
                if feat_orglf0 is not None:
                    feat_orglf0 = np.r_[feat_orglf0, feat_orglf0_list[i]]
                else:
                    feat_orglf0 = feat_orglf0_list[i]
        logging.info('feat orglf0: %d' % (len(feat_orglf0)))
        logging.info(feat_orglf0.shape)

        var_range = None
        for i in range(len(varmcep_list)):
            if varmcep_list[i] is not None:
                logging.info(i)
                logging.info(varmcep_list[i].shape)
                if var_range is not None:
                    var_range = np.r_[var_range, varmcep_list[i]]
                else:
                    var_range = varmcep_list[i]
        logging.info('var mcep: %d' % (len(var_range)))
        logging.info(var_range.shape)

        f0s_range = None
        for i in range(len(f0_list)):
            if f0_list[i] is not None:
                logging.info(i)
                logging.info(f0_list[i].shape)
                if f0s_range is not None:
                    f0s_range = np.r_[f0s_range, f0_list[i]]
                else:
                    f0s_range = f0_list[i]
        logging.info('f0: %d' % (len(f0s_range)))
        logging.info(f0s_range.shape)

        melsp = None
        for i in range(len(melsp_list)):
            if melsp_list[i] is not None:
                logging.info(i)
                logging.info(melsp_list[i].shape)
                if melsp is not None:
                    melsp = np.r_[melsp, melsp_list[i]]
                else:
                    melsp = melsp_list[i]
        logging.info('melsp: %d' % (len(melsp)))
        logging.info(melsp.shape)

        var_melsp = None
        for i in range(len(varmelsp_list)):
            if varmelsp_list[i] is not None:
                logging.info(i)
                logging.info(varmelsp_list[i].shape)
                if var_melsp is not None:
                    var_melsp = np.r_[var_melsp, varmelsp_list[i]]
                else:
                    var_melsp = varmelsp_list[i]
        logging.info('var melsp: %d' % (len(var_melsp)))
        logging.info(var_melsp.shape)

        melworldsp = None
        for i in range(len(melworldsp_list)):
            if melworldsp_list[i] is not None:
                logging.info(i)
                logging.info(melworldsp_list[i].shape)
                if melworldsp is not None:
                    melworldsp = np.r_[melworldsp, melworldsp_list[i]]
                else:
                    melworldsp = melworldsp_list[i]
        logging.info('melworldsp: %d' % (len(melworldsp)))
        logging.info(melworldsp.shape)

        var_melworldsp = None
        for i in range(len(varmelworldsp_list)):
            if varmelworldsp_list[i] is not None:
                logging.info(i)
                logging.info(varmelworldsp_list[i].shape)
                if var_melworldsp is not None:
                    var_melworldsp = np.r_[var_melworldsp,
                                           varmelworldsp_list[i]]
                else:
                    var_melworldsp = varmelworldsp_list[i]
        logging.info('var melworldsp: %d' % (len(var_melworldsp)))
        logging.info(var_melworldsp.shape)

        scaler_feat_mceplf0cap = StandardScaler()
        scaler_feat_orglf0 = StandardScaler()

        logging.info(feat_mceplf0cap.shape)
        #min_mcep = np.min(feat_mceplf0cap[:,-args.mcep_dim:], axis=0)
        #max_mcep = np.max(feat_mceplf0cap[:,-args.mcep_dim:], axis=0)
        #logging.info(min_mcep)
        #logging.info(max_mcep)
        #write_hdf5(args.stats, "/min_mcep", min_mcep)
        #write_hdf5(args.stats, "/max_mcep", max_mcep)

        scaler_feat_mceplf0cap.partial_fit(feat_mceplf0cap)
        scaler_feat_orglf0.partial_fit(feat_orglf0)

        logging.info(melsp.shape)
        #min_melsp = np.min(melsp, axis=0)
        #max_melsp = np.max(melsp, axis=0)
        #logging.info(min_melsp)
        #logging.info(max_melsp)
        #write_hdf5(args.stats, "/min_melsp", min_melsp)
        #write_hdf5(args.stats, "/max_melsp", max_melsp)

        scaler_melsp = StandardScaler()
        scaler_melsp.partial_fit(melsp)

        mean_feat_mceplf0cap = scaler_feat_mceplf0cap.mean_
        scale_feat_mceplf0cap = scaler_feat_mceplf0cap.scale_

        #logging.info("mcep_bound")
        #min_mcep_bound = min_mcep-scale_feat_mceplf0cap[-args.mcep_dim:]
        #max_mcep_bound = max_mcep+scale_feat_mceplf0cap[-args.mcep_dim:]
        #logging.info(min_mcep_bound)
        #logging.info(max_mcep_bound)
        #write_hdf5(args.stats, "/min_mcep_bound", min_mcep_bound)
        #write_hdf5(args.stats, "/max_mcep_bound", max_mcep_bound)

        mean_feat_orglf0 = scaler_feat_orglf0.mean_
        scale_feat_orglf0 = scaler_feat_orglf0.scale_
        gv_range_mean = np.mean(np.array(var_range), axis=0)
        gv_range_var = np.var(np.array(var_range), axis=0)
        logging.info(gv_range_mean)
        logging.info(gv_range_var)
        f0_range_mean = np.mean(f0s_range)
        f0_range_std = np.std(f0s_range)
        logging.info(f0_range_mean)
        logging.info(f0_range_std)
        lf0_range_mean = np.mean(np.log(f0s_range))
        lf0_range_std = np.std(np.log(f0s_range))
        logging.info(lf0_range_mean)
        logging.info(lf0_range_std)

        logging.info(mean_feat_mceplf0cap)
        logging.info(scale_feat_mceplf0cap)
        write_hdf5(args.stats, "/mean_feat_mceplf0cap", mean_feat_mceplf0cap)
        write_hdf5(args.stats, "/scale_feat_mceplf0cap", scale_feat_mceplf0cap)
        logging.info(mean_feat_orglf0)
        logging.info(scale_feat_orglf0)
        write_hdf5(args.stats, "/mean_feat_org_lf0", mean_feat_orglf0)
        write_hdf5(args.stats, "/scale_feat_org_lf0", scale_feat_orglf0)
        write_hdf5(args.stats, "/gv_range_mean", gv_range_mean)
        write_hdf5(args.stats, "/gv_range_var", gv_range_var)
        write_hdf5(args.stats, "/f0_range_mean", f0_range_mean)
        write_hdf5(args.stats, "/f0_range_std", f0_range_std)
        write_hdf5(args.stats, "/lf0_range_mean", lf0_range_mean)
        write_hdf5(args.stats, "/lf0_range_std", lf0_range_std)

        mean_melsp = scaler_melsp.mean_
        scale_melsp = scaler_melsp.scale_

        #logging.info("melsp_bound")
        #min_melsp_bound = min_melsp-scale_melsp
        #max_melsp_bound = max_melsp+scale_melsp
        #logging.info(min_melsp_bound)
        #logging.info(max_melsp_bound)
        #write_hdf5(args.stats, "/min_melsp_bound", min_melsp_bound)
        #write_hdf5(args.stats, "/max_melsp_bound", max_melsp_bound)

        gv_melsp_mean = np.mean(np.array(var_melsp), axis=0)
        gv_melsp_var = np.var(np.array(var_melsp), axis=0)
        logging.info(gv_melsp_mean)
        logging.info(gv_melsp_var)
        logging.info(mean_melsp)
        logging.info(scale_melsp)
        write_hdf5(args.stats, "/mean_melsp", mean_melsp)
        write_hdf5(args.stats, "/scale_melsp", scale_melsp)
        write_hdf5(args.stats, "/gv_melsp_mean", gv_melsp_mean)
        write_hdf5(args.stats, "/gv_melsp_var", gv_melsp_var)

        scaler_melworldsp = StandardScaler()
        scaler_melworldsp.partial_fit(melworldsp)

        mean_melworldsp = scaler_melworldsp.mean_
        scale_melworldsp = scaler_melworldsp.scale_

        #logging.info("melworldsp_bound")
        #min_melworldsp_bound = min_melworldsp-scale_melworldsp
        #max_melworldsp_bound = max_melworldsp+scale_melworldsp
        #logging.info(min_melworldsp_bound)
        #logging.info(max_melworldsp_bound)
        #write_hdf5(args.stats, "/min_melworldsp_bound", min_melworldsp_bound)
        #write_hdf5(args.stats, "/max_melworldsp_bound", max_melworldsp_bound)

        gv_melworldsp_mean = np.mean(np.array(var_melworldsp), axis=0)
        gv_melworldsp_var = np.var(np.array(var_melworldsp), axis=0)
        logging.info(gv_melworldsp_mean)
        logging.info(gv_melworldsp_var)
        logging.info(mean_melworldsp)
        logging.info(scale_melworldsp)
        write_hdf5(args.stats, "/mean_melworldsp", mean_melworldsp)
        write_hdf5(args.stats, "/scale_melworldsp", scale_melworldsp)
        write_hdf5(args.stats, "/gv_melworldsp_mean", gv_melworldsp_mean)
        write_hdf5(args.stats, "/gv_melworldsp_var", gv_melworldsp_var)
import os
import sys
sys.path.insert(1, os.path.realpath('../lib/py/'))
import utils
import argparse
import numpy as np


def get_parser():
    """
    Get parser.

    :return: parser
    :rtype: argparse.ArgumentParser
    """

    parser = argparse.ArgumentParser()
    parser.add_argument('--code_size', type=int)
    parser.add_argument('--number', type=int)

    return parser


if __name__ == '__main__':
    parser = get_parser()
    args = parser.parse_args()

    codes = np.random.randn(args.code_size, args.number)
    utils.write_hdf5(
        'codes_' + str(args.code_size) + '_' + str(args.number) + '.h5', codes)
Example #28
0
    def gpu_decode(feat_list,
                   gpu,
                   cvlist=None,
                   lsd_cvlist=None,
                   lsdstd_cvlist=None,
                   cvlist_dv=None,
                   lsd_cvlist_dv=None,
                   lsdstd_cvlist_dv=None,
                   f0rmse_cvlist=None,
                   f0corr_cvlist=None,
                   caprmse_cvlist=None,
                   f0rmse_cvlist_dv=None,
                   f0corr_cvlist_dv=None,
                   caprmse_cvlist_dv=None,
                   cvlist_cyc=None,
                   lsd_cvlist_cyc=None,
                   lsdstd_cvlist_cyc=None,
                   cvlist_cyc_dv=None,
                   lsd_cvlist_cyc_dv=None,
                   lsdstd_cvlist_cyc_dv=None,
                   f0rmse_cvlist_cyc=None,
                   f0corr_cvlist_cyc=None,
                   caprmse_cvlist_cyc=None,
                   f0rmse_cvlist_cyc_dv=None,
                   f0corr_cvlist_cyc_dv=None,
                   caprmse_cvlist_cyc_dv=None):
        with torch.cuda.device(gpu):
            # define model and load parameters
            with torch.no_grad():
                model_encoder_melsp = GRU_VAE_ENCODER(
                    in_dim=config.mel_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_enc)
                logging.info(model_encoder_melsp)
                model_decoder_melsp = GRU_SPEC_DECODER(
                    feat_dim=config.lat_dim + config.lat_dim_e,
                    excit_dim=config.excit_dim,
                    out_dim=config.mel_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_dec,
                    hidden_units=config.hidden_units_dec,
                    kernel_size=config.kernel_size_dec,
                    dilation_size=config.dilation_size_dec,
                    causal_conv=config.causal_conv_dec,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_dec)
                logging.info(model_decoder_melsp)
                model_encoder_excit = GRU_VAE_ENCODER(
                    in_dim=config.mel_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim_e,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_enc)
                logging.info(model_encoder_excit)
                model_decoder_excit = GRU_EXCIT_DECODER(
                    feat_dim=config.lat_dim_e,
                    cap_dim=config.cap_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_lf0,
                    hidden_units=config.hidden_units_lf0,
                    kernel_size=config.kernel_size_lf0,
                    dilation_size=config.dilation_size_lf0,
                    causal_conv=config.causal_conv_lf0,
                    bi=False,
                    ar=False,
                    pad_first=True,
                    right_size=config.right_size_lf0)
                logging.info(model_decoder_excit)
                if (config.spkidtr_dim > 0):
                    model_spkidtr = SPKID_TRANSFORM_LAYER(
                        n_spk=n_spk, spkidtr_dim=config.spkidtr_dim)
                    logging.info(model_spkidtr)
                model_encoder_melsp.load_state_dict(
                    torch.load(args.model)["model_encoder_melsp"])
                model_decoder_melsp.load_state_dict(
                    torch.load(args.model)["model_decoder_melsp"])
                model_encoder_excit.load_state_dict(
                    torch.load(args.model)["model_encoder_excit"])
                model_decoder_excit.load_state_dict(
                    torch.load(args.model)["model_decoder_excit"])
                if (config.spkidtr_dim > 0):
                    model_spkidtr.load_state_dict(
                        torch.load(args.model)["model_spkidtr"])
                model_encoder_melsp.cuda()
                model_decoder_melsp.cuda()
                model_encoder_excit.cuda()
                model_decoder_excit.cuda()
                if (config.spkidtr_dim > 0):
                    model_spkidtr.cuda()
                model_encoder_melsp.eval()
                model_decoder_melsp.eval()
                model_encoder_excit.eval()
                model_decoder_excit.eval()
                if (config.spkidtr_dim > 0):
                    model_spkidtr.eval()
                for param in model_encoder_melsp.parameters():
                    param.requires_grad = False
                for param in model_decoder_melsp.parameters():
                    param.requires_grad = False
                for param in model_encoder_excit.parameters():
                    param.requires_grad = False
                for param in model_decoder_excit.parameters():
                    param.requires_grad = False
                if (config.spkidtr_dim > 0):
                    for param in model_spkidtr.parameters():
                        param.requires_grad = False
            count = 0
            pad_left = (model_encoder_melsp.pad_left +
                        model_decoder_excit.pad_left +
                        model_decoder_melsp.pad_left) * 2
            pad_right = (model_encoder_melsp.pad_right +
                         model_decoder_excit.pad_right +
                         model_decoder_melsp.pad_right) * 2
            outpad_lefts = [None] * 5
            outpad_rights = [None] * 5
            outpad_lefts[0] = pad_left - model_encoder_melsp.pad_left
            outpad_rights[0] = pad_right - model_encoder_melsp.pad_right
            outpad_lefts[1] = outpad_lefts[0] - model_decoder_excit.pad_left
            outpad_rights[1] = outpad_rights[0] - model_decoder_excit.pad_right
            outpad_lefts[2] = outpad_lefts[1] - model_decoder_melsp.pad_left
            outpad_rights[2] = outpad_rights[1] - model_decoder_melsp.pad_right
            outpad_lefts[3] = outpad_lefts[2] - model_encoder_melsp.pad_left
            outpad_rights[3] = outpad_rights[2] - model_encoder_melsp.pad_right
            outpad_lefts[4] = outpad_lefts[3] - model_decoder_excit.pad_left
            outpad_rights[4] = outpad_rights[3] - model_decoder_excit.pad_right
            for feat_file in feat_list:
                # reconst. melsp
                logging.info("recmelsp " + feat_file)

                feat_org = read_hdf5(feat_file, "/log_1pmelmagsp")
                logging.info(feat_org.shape)

                with torch.no_grad():
                    feat = F.pad(
                        torch.FloatTensor(feat_org).cuda().unsqueeze(
                            0).transpose(1, 2), (pad_left, pad_right),
                        "replicate").transpose(1, 2)

                    spk_logits, _, lat_src, _ = model_encoder_melsp(
                        feat, sampling=False)
                    spk_logits_e, _, lat_src_e, _ = model_encoder_excit(
                        feat, sampling=False)
                    logging.info('input spkpost')
                    if outpad_rights[0] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[0]:
                                                     -outpad_rights[0]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[0]:],
                                          dim=-1), 1))
                    logging.info('input spkpost_e')
                    if outpad_rights[0] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits_e[:, outpad_lefts[0]:
                                                       -outpad_rights[0]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits_e[:, outpad_lefts[0]:],
                                          dim=-1), 1))

                    if config.spkidtr_dim > 0:
                        src_code = model_spkidtr((torch.ones(
                            (1, lat_src_e.shape[1])) * spk_idx).cuda().long())
                    else:
                        src_code = (torch.ones(
                            (1, lat_src_e.shape[1])) * spk_idx).cuda().long()
                    cvlf0_src, _ = model_decoder_excit(src_code, lat_src_e)

                    if model_decoder_excit.pad_right > 0:
                        lat_cat = torch.cat((
                            lat_src_e[:, model_decoder_excit.
                                      pad_left:-model_decoder_excit.pad_right],
                            lat_src[:, model_decoder_excit.
                                    pad_left:-model_decoder_excit.pad_right]),
                                            2)
                    else:
                        lat_cat = torch.cat(
                            (lat_src_e[:, model_decoder_excit.pad_left:],
                             lat_src[:, model_decoder_excit.pad_left:]), 2)
                    if config.spkidtr_dim > 0:
                        src_code = model_spkidtr((torch.ones(
                            (1, lat_cat.shape[1])) * spk_idx).cuda().long())
                    else:
                        src_code = (torch.ones(
                            (1, lat_cat.shape[1])) * spk_idx).cuda().long()
                    cvmelsp_src, _ = model_decoder_melsp(
                        lat_cat,
                        y=src_code,
                        e=cvlf0_src[:, :, :config.excit_dim])

                    spk_logits, _, lat_rec, _ = model_encoder_melsp(
                        cvmelsp_src, sampling=False)
                    spk_logits_e, _, lat_rec_e, _ = model_encoder_excit(
                        cvmelsp_src, sampling=False)
                    logging.info('rec spkpost')
                    if outpad_rights[3] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[3]:
                                                     -outpad_rights[3]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[3]:],
                                          dim=-1), 1))
                    logging.info('rec spkpost_e')
                    if outpad_rights[3] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits_e[:, outpad_lefts[3]:
                                                       -outpad_rights[3]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits_e[:, outpad_lefts[3]:],
                                          dim=-1), 1))

                    if config.spkidtr_dim > 0:
                        src_code = model_spkidtr((torch.ones(
                            (1, lat_rec_e.shape[1])) * spk_idx).cuda().long())
                    else:
                        src_code = (torch.ones(
                            (1, lat_rec_e.shape[1])) * spk_idx).cuda().long()
                    cvlf0_cyc, _ = model_decoder_excit(src_code, lat_rec_e)

                    if model_decoder_excit.pad_right > 0:
                        lat_cat = torch.cat((
                            lat_rec_e[:, model_decoder_excit.
                                      pad_left:-model_decoder_excit.pad_right],
                            lat_rec[:, model_decoder_excit.
                                    pad_left:-model_decoder_excit.pad_right]),
                                            2)
                    else:
                        lat_cat = torch.cat(
                            (lat_rec_e[:, model_decoder_excit.pad_left:],
                             lat_rec[:, model_decoder_excit.pad_left:]), 2)
                    if config.spkidtr_dim > 0:
                        src_code = model_spkidtr((torch.ones(
                            (1, lat_cat.shape[1])) * spk_idx).cuda().long())
                    else:
                        src_code = (torch.ones(
                            (1, lat_cat.shape[1])) * spk_idx).cuda().long()
                    cvmelsp_cyc, _ = model_decoder_melsp(
                        lat_cat,
                        y=src_code,
                        e=cvlf0_cyc[:, :, :config.excit_dim])

                    if outpad_rights[1] > 0:
                        cvlf0_src = cvlf0_src[:, outpad_lefts[1]:
                                              -outpad_rights[1]]
                    else:
                        cvlf0_src = cvlf0_src[:, outpad_lefts[1]:]
                    if outpad_rights[2] > 0:
                        cvmelsp_src = cvmelsp_src[:, outpad_lefts[2]:
                                                  -outpad_rights[2]]
                    else:
                        cvmelsp_src = cvmelsp_src[:, outpad_lefts[2]:]
                    if outpad_rights[4] > 0:
                        cvlf0_cyc = cvlf0_cyc[:, outpad_lefts[4]:
                                              -outpad_rights[4]]
                    else:
                        cvlf0_cyc = cvlf0_cyc[:, outpad_lefts[4]:]

                    feat_rec = cvmelsp_src[0].cpu().data.numpy()
                    feat_cyc = cvmelsp_cyc[0].cpu().data.numpy()

                    cvmelsp_src = np.array(cvmelsp_src[0].cpu().data.numpy(),
                                           dtype=np.float64)
                    cvlf0_src = np.array(cvlf0_src[0].cpu().data.numpy(),
                                         dtype=np.float64)

                    cvmelsp_cyc = np.array(cvmelsp_cyc[0].cpu().data.numpy(),
                                           dtype=np.float64)
                    cvlf0_cyc = np.array(cvlf0_cyc[0].cpu().data.numpy(),
                                         dtype=np.float64)

                logging.info(cvlf0_src.shape)
                logging.info(cvmelsp_src.shape)

                logging.info(cvlf0_cyc.shape)
                logging.info(cvmelsp_cyc.shape)

                melsp = np.array(feat_org)

                feat_world = read_hdf5(feat_file, "/feat_mceplf0cap")
                f0 = np.array(
                    np.rint(feat_world[:, 0]) * np.exp(feat_world[:, 1]))
                codeap = np.array(
                    np.rint(feat_world[:, 2:3]) *
                    (-np.exp(feat_world[:, 3:config.full_excit_dim])))

                cvf0_src = np.array(
                    np.rint(cvlf0_src[:, 0]) * np.exp(cvlf0_src[:, 1]))
                cvcodeap_src = np.array(
                    np.rint(cvlf0_src[:, 2:3]) * (-np.exp(cvlf0_src[:, 3:])))
                f0_rmse = np.sqrt(np.mean((cvf0_src - f0)**2))
                logging.info('F0_rmse_rec: %lf Hz' % (f0_rmse))
                cvf0_src_mean = np.mean(cvf0_src)
                f0_mean = np.mean(f0)
                f0_corr = np.sum((cvf0_src-cvf0_src_mean)*(f0-f0_mean))/\
                            (np.sqrt(np.sum((cvf0_src-cvf0_src_mean)**2))*np.sqrt(np.sum((f0-f0_mean)**2)))
                logging.info('F0_corr_rec: %lf' % (f0_corr))

                codeap_rmse = np.sqrt(
                    np.mean((cvcodeap_src - codeap)**2, axis=0))
                for i in range(codeap_rmse.shape[-1]):
                    logging.info('codeap-%d_rmse_rec: %lf dB' %
                                 (i + 1, codeap_rmse[i]))

                cvf0_cyc = np.array(
                    np.rint(cvlf0_cyc[:, 0]) * np.exp(cvlf0_cyc[:, 1]))
                cvcodeap_cyc = np.array(
                    np.rint(cvlf0_cyc[:, 2:3]) * (-np.exp(cvlf0_cyc[:, 3:])))
                f0_rmse_cyc = np.sqrt(np.mean((cvf0_cyc - f0)**2))
                logging.info('F0_rmse_cyc: %lf Hz' % (f0_rmse_cyc))
                cvf0_cyc_mean = np.mean(cvf0_cyc)
                f0_mean = np.mean(f0)
                f0_corr_cyc = np.sum((cvf0_cyc-cvf0_cyc_mean)*(f0-f0_mean))/\
                            (np.sqrt(np.sum((cvf0_cyc-cvf0_cyc_mean)**2))*np.sqrt(np.sum((f0-f0_mean)**2)))
                logging.info('F0_corr_cyc: %lf' % (f0_corr_cyc))

                codeap_rmse_cyc = np.sqrt(
                    np.mean((cvcodeap_cyc - codeap)**2, axis=0))
                for i in range(codeap_rmse_cyc.shape[-1]):
                    logging.info('codeap-%d_rmse_cyc: %lf dB' %
                                 (i + 1, codeap_rmse_cyc[i]))

                spcidx = np.array(read_hdf5(feat_file, "/spcidx_range")[0])

                melsp_rest = (np.exp(melsp) - 1) / 10000
                melsp_src_rest = (np.exp(cvmelsp_src) - 1) / 10000
                melsp_cyc_rest = (np.exp(cvmelsp_cyc) - 1) / 10000

                lsd_arr = np.sqrt(np.mean((20*(np.log10(np.clip(melsp_src_rest[spcidx], a_min=1e-16, a_max=None))\
                                                         -np.log10(np.clip(melsp_rest[spcidx], a_min=1e-16, a_max=None))))**2, axis=-1))
                lsd_mean = np.mean(lsd_arr)
                lsd_std = np.std(lsd_arr)
                logging.info("lsd_rec: %.6f dB +- %.6f" % (lsd_mean, lsd_std))

                lsd_arr = np.sqrt(np.mean((20*(np.log10(np.clip(melsp_cyc_rest[spcidx], a_min=1e-16, a_max=None))\
                                                         -np.log10(np.clip(melsp_rest[spcidx], a_min=1e-16, a_max=None))))**2, axis=-1))
                lsd_mean_cyc = np.mean(lsd_arr)
                lsd_std_cyc = np.std(lsd_arr)
                logging.info("lsd_cyc: %.6f dB +- %.6f" %
                             (lsd_mean_cyc, lsd_std_cyc))

                logging.info('org f0')
                logging.info(f0[10:15])
                logging.info('rec f0')
                logging.info(cvf0_src[10:15])
                logging.info('cyc f0')
                logging.info(cvf0_cyc[10:15])
                logging.info('org cap')
                logging.info(codeap[10:15])
                logging.info('rec cap')
                logging.info(cvcodeap_src[10:15])
                logging.info('cyc cap')
                logging.info(cvcodeap_cyc[10:15])

                dataset = feat_file.split('/')[1].split('_')[0]
                if 'tr' in dataset:
                    logging.info('trn')
                    f0rmse_cvlist.append(f0_rmse)
                    f0corr_cvlist.append(f0_corr)
                    caprmse_cvlist.append(codeap_rmse)
                    lsd_cvlist.append(lsd_mean)
                    lsdstd_cvlist.append(lsd_std)
                    cvlist.append(np.var(melsp_src_rest, axis=0))
                    logging.info(len(cvlist))
                    f0rmse_cvlist_cyc.append(f0_rmse_cyc)
                    f0corr_cvlist_cyc.append(f0_corr_cyc)
                    caprmse_cvlist_cyc.append(codeap_rmse_cyc)
                    lsd_cvlist_cyc.append(lsd_mean_cyc)
                    lsdstd_cvlist_cyc.append(lsd_std_cyc)
                    cvlist_cyc.append(np.var(melsp_cyc_rest, axis=0))
                elif 'dv' in dataset:
                    logging.info('dev')
                    f0rmse_cvlist_dv.append(f0_rmse)
                    f0corr_cvlist_dv.append(f0_corr)
                    caprmse_cvlist_dv.append(codeap_rmse)
                    lsd_cvlist_dv.append(lsd_mean)
                    lsdstd_cvlist_dv.append(lsd_std)
                    cvlist_dv.append(np.var(melsp_src_rest, axis=0))
                    logging.info(len(cvlist_dv))
                    f0rmse_cvlist_cyc_dv.append(f0_rmse_cyc)
                    f0corr_cvlist_cyc_dv.append(f0_corr_cyc)
                    caprmse_cvlist_cyc_dv.append(codeap_rmse_cyc)
                    lsd_cvlist_cyc_dv.append(lsd_mean_cyc)
                    lsdstd_cvlist_cyc_dv.append(lsd_std_cyc)
                    cvlist_cyc_dv.append(np.var(melsp_cyc_rest, axis=0))

                logging.info('write rec to h5')
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    args.spk + "-" + args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_rec.shape)
                write_hdf5(feat_file, args.string_path, feat_rec)

                logging.info('write cyc to h5')
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    args.spk + "-" + args.spk + "-" + args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_cyc.shape)
                write_hdf5(feat_file, args.string_path, feat_cyc)

                count += 1
Example #29
0
def main():
    parser = argparse.ArgumentParser()
    # decode setting
    parser.add_argument("--feats",
                        required=True,
                        type=str,
                        help="list or directory of source eval feat files")
    parser.add_argument("--spk",
                        required=True,
                        type=str,
                        help="speaker name to be reconstructed")
    parser.add_argument("--model", required=True, type=str, help="model file")
    parser.add_argument("--config",
                        required=True,
                        type=str,
                        help="configure file")
    parser.add_argument("--n_gpus", default=1, type=int, help="number of gpus")
    parser.add_argument("--outdir",
                        required=True,
                        type=str,
                        help="directory to save log")
    parser.add_argument("--string_path",
                        required=True,
                        type=str,
                        help="path of h5 generated feature")
    # other setting
    parser.add_argument("--GPU_device",
                        default=None,
                        type=int,
                        help="selection of GPU device")
    parser.add_argument("--GPU_device_str",
                        default=None,
                        type=str,
                        help="selection of GPU device")
    parser.add_argument("--verbose", default=1, type=int, help="log level")

    args = parser.parse_args()

    if args.GPU_device is not None or args.GPU_device_str is not None:
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        if args.GPU_device_str is None:
            os.environ["CUDA_VISIBLE_DEVICES"] = str(args.GPU_device)
        else:
            os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU_device_str

    # check directory existence
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    # set log level
    if args.verbose > 0:
        logging.basicConfig(
            level=logging.INFO,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filemode='w',
            filename=args.outdir + "/decode.log")
        logging.getLogger().addHandler(logging.StreamHandler())
    elif args.verbose > 1:
        logging.basicConfig(
            level=logging.DEBUG,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filemode='w',
            filename=args.outdir + "/decode.log")
        logging.getLogger().addHandler(logging.StreamHandler())
    else:
        logging.basicConfig(
            level=logging.WARN,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filemode='w',
            filename=args.outdir + "/decode.log")
        logging.getLogger().addHandler(logging.StreamHandler())
        logging.warn("logging is disabled.")

    # load config
    config = torch.load(args.config)

    # get source feat list
    if os.path.isdir(args.feats):
        feat_list = sorted(find_files(args.feats, "*.h5"))
    elif os.path.isfile(args.feats):
        feat_list = read_txt(args.feats)
    else:
        logging.error("--feats should be directory or list.")
        sys.exit(1)

    # prepare the file list for parallel decoding
    feat_lists = np.array_split(feat_list, args.n_gpus)
    feat_lists = [f_list.tolist() for f_list in feat_lists]
    for i in range(args.n_gpus):
        logging.info('%d: %d' % (i + 1, len(feat_lists[i])))

    spk_list = config.spk_list.split('@')
    n_spk = len(spk_list)
    spk_idx = spk_list.index(args.spk)

    stats_list = config.stats_list.split('@')
    assert (n_spk == len(stats_list))

    spk_stat = stats_list[spk_idx]
    gv_mean = read_hdf5(spk_stat, "/gv_range_mean")[1:]

    model_epoch = os.path.basename(args.model).split('.')[0].split('-')[1]
    logging.info('epoch: ' + model_epoch)

    model_name = os.path.basename(os.path.dirname(args.model)).split('_')[1]
    logging.info('mdl_name: ' + model_name)

    logging.info(config)

    # define gpu decode function
    def gpu_decode(feat_list, gpu, cvlist=None, mcdpow_cvlist=None, mcdpowstd_cvlist=None, mcd_cvlist=None, \
                    mcdstd_cvlist=None, cvlist_dv=None, mcdpow_cvlist_dv=None, mcdpowstd_cvlist_dv=None, \
                    mcd_cvlist_dv=None, mcdstd_cvlist_dv=None, \
                    cvlist_cyc=None, mcdpow_cvlist_cyc=None, mcdpowstd_cvlist_cyc=None, mcd_cvlist_cyc=None, \
                    mcdstd_cvlist_cyc=None, cvlist_cyc_dv=None, mcdpow_cvlist_cyc_dv=None, mcdpowstd_cvlist_cyc_dv=None, \
                    mcd_cvlist_cyc_dv=None, mcdstd_cvlist_cyc_dv=None):
        with torch.cuda.device(gpu):
            # define model and load parameters
            with torch.no_grad():
                model_encoder = GRU_VAE_ENCODER(
                    in_dim=config.mcep_dim + config.excit_dim,
                    n_spk=n_spk,
                    lat_dim=config.lat_dim,
                    hidden_layers=config.hidden_layers_enc,
                    hidden_units=config.hidden_units_enc,
                    kernel_size=config.kernel_size_enc,
                    dilation_size=config.dilation_size_enc,
                    causal_conv=config.causal_conv_enc,
                    bi=config.bi_enc,
                    cont=False,
                    pad_first=True,
                    right_size=config.right_size,
                    ar=config.ar_enc)
                logging.info(model_encoder)
                model_decoder = GRU_SPEC_DECODER(
                    feat_dim=config.lat_dim,
                    out_dim=config.mcep_dim,
                    n_spk=n_spk,
                    hidden_layers=config.hidden_layers_dec,
                    hidden_units=config.hidden_units_dec,
                    kernel_size=config.kernel_size_dec,
                    dilation_size=config.dilation_size_dec,
                    causal_conv=config.causal_conv_dec,
                    bi=config.bi_dec,
                    pad_first=True,
                    ar=config.ar_dec)
                logging.info(model_decoder)
                model_vq = torch.nn.Embedding(config.ctr_size, config.lat_dim)
                logging.info(model_vq)
                model_encoder.load_state_dict(
                    torch.load(args.model)["model_encoder"])
                model_decoder.load_state_dict(
                    torch.load(args.model)["model_decoder"])
                model_vq.load_state_dict(torch.load(args.model)["model_vq"])
                model_encoder.cuda()
                model_decoder.cuda()
                model_vq.cuda()
                model_encoder.eval()
                model_decoder.eval()
                model_vq.eval()
                for param in model_encoder.parameters():
                    param.requires_grad = False
                for param in model_decoder.parameters():
                    param.requires_grad = False
                for param in model_vq.parameters():
                    param.requires_grad = False
                if config.ar_enc:
                    yz_in = torch.zeros((1, 1, n_spk + config.lat_dim)).cuda()
                if config.ar_dec:
                    mean_stats = torch.FloatTensor(
                        read_hdf5(
                            config.stats,
                            "/mean_" + config.string_path.replace("/", "")))
                    scale_stats = torch.FloatTensor(
                        read_hdf5(
                            config.stats,
                            "/scale_" + config.string_path.replace("/", "")))
                    x_in = ((torch.zeros((1, 1, config.mcep_dim)) -
                             mean_stats[config.excit_dim:]) /
                            scale_stats[config.excit_dim:]).cuda()
            count = 0
            pad_left = (model_encoder.pad_left + model_decoder.pad_left) * 2
            pad_right = (model_encoder.pad_right + model_decoder.pad_right) * 2
            outpad_lefts = [None] * 3
            outpad_rights = [None] * 3
            outpad_lefts[0] = pad_left - model_encoder.pad_left
            outpad_rights[0] = pad_right - model_encoder.pad_right
            outpad_lefts[1] = outpad_lefts[0] - model_decoder.pad_left
            outpad_rights[1] = outpad_rights[0] - model_decoder.pad_right
            outpad_lefts[2] = outpad_lefts[1] - model_encoder.pad_left
            outpad_rights[2] = outpad_rights[1] - model_encoder.pad_right
            for feat_file in feat_list:
                # convert mcep
                logging.info("recmcep " + feat_file)

                feat_org = read_hdf5(feat_file, "/feat_mceplf0cap")
                logging.info(feat_org.shape)
                mcep = np.array(feat_org[:, -model_decoder.out_dim:])

                with torch.no_grad():
                    feat = torch.FloatTensor(feat_org).cuda().unsqueeze(0)
                    feat_excit = feat[:, :, :config.excit_dim]

                    if config.ar_enc:
                        spk_logits, lat_src, _, _ = model_encoder(F.pad(feat.transpose(1,2), (pad_left,pad_right), "replicate").transpose(1,2), \
                                                            yz_in=yz_in)
                    else:
                        spk_logits, lat_src, _ = model_encoder(
                            F.pad(feat.transpose(1, 2), (pad_left, pad_right),
                                  "replicate").transpose(1, 2))
                    idx_vq = nn_search_batch(lat_src, model_vq.weight)
                    lat_src = model_vq(idx_vq)
                    if outpad_rights[0] > 0:
                        unique, counts = np.unique(
                            idx_vq[:, outpad_lefts[0]:-outpad_rights[0]].cpu(
                            ).data.numpy(),
                            return_counts=True)
                    else:
                        unique, counts = np.unique(
                            idx_vq[:, outpad_lefts[0]:].cpu().data.numpy(),
                            return_counts=True)
                    logging.info("input vq")
                    logging.info(dict(zip(unique, counts)))
                    logging.info('input spkpost')
                    if outpad_rights[0] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[0]:
                                                     -outpad_rights[0]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[0]:],
                                          dim=-1), 1))

                    src_code = (torch.ones(
                        (1, lat_src.shape[1])) * spk_idx).cuda().long()
                    if config.ar_dec:
                        cvmcep_src, _, _ = model_decoder(src_code,
                                                         lat_src,
                                                         x_in=x_in)
                    else:
                        cvmcep_src, _ = model_decoder(src_code, lat_src)

                    if config.ar_enc:
                        spk_logits, lat_rec, _, _ = model_encoder(torch.cat((F.pad(feat_excit.transpose(1,2), \
                                            (outpad_lefts[1],outpad_rights[1]), "replicate").transpose(1,2), cvmcep_src), 2),
                                                            yz_in=yz_in)
                    else:
                        spk_logits, lat_rec, _ = model_encoder(torch.cat((F.pad(feat_excit.transpose(1,2), \
                                            (outpad_lefts[1],outpad_rights[1]), "replicate").transpose(1,2), cvmcep_src), 2))
                    idx_vq = nn_search_batch(lat_rec, model_vq.weight)
                    lat_rec = model_vq(idx_vq)
                    if outpad_rights[2] > 0:
                        unique, counts = np.unique(
                            idx_vq[:, outpad_lefts[2]:-outpad_rights[2]].cpu(
                            ).data.numpy(),
                            return_counts=True)
                    else:
                        unique, counts = np.unique(
                            idx_vq[:, outpad_lefts[2]:].cpu().data.numpy(),
                            return_counts=True)
                    logging.info("rec vq")
                    logging.info(dict(zip(unique, counts)))
                    logging.info('rec spkpost')
                    if outpad_rights[2] > 0:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[2]:
                                                     -outpad_rights[2]],
                                          dim=-1), 1))
                    else:
                        logging.info(
                            torch.mean(
                                F.softmax(spk_logits[:, outpad_lefts[2]:],
                                          dim=-1), 1))

                    src_code = (torch.ones(
                        (1, lat_rec.shape[1])) * spk_idx).cuda().long()
                    if config.ar_dec:
                        cvmcep_cyc, _, _ = model_decoder(src_code,
                                                         lat_rec,
                                                         x_in=x_in)
                    else:
                        cvmcep_cyc, _ = model_decoder(src_code, lat_rec)

                    if outpad_rights[1] > 0:
                        cvmcep_src = cvmcep_src[:, outpad_lefts[1]:
                                                -outpad_rights[1]]
                    else:
                        cvmcep_src = cvmcep_src[:, outpad_lefts[1]:]

                    feat_rec = torch.cat((feat_excit, cvmcep_src),
                                         2)[0].cpu().data.numpy()
                    feat_cyc = torch.cat((feat_excit, cvmcep_cyc),
                                         2)[0].cpu().data.numpy()

                    cvmcep_src = np.array(cvmcep_src[0].cpu().data.numpy(),
                                          dtype=np.float64)
                    cvmcep_cyc = np.array(cvmcep_cyc[0].cpu().data.numpy(),
                                          dtype=np.float64)

                logging.info(cvmcep_src.shape)
                logging.info(cvmcep_cyc.shape)

                spcidx = read_hdf5(feat_file, "/spcidx_range")[0]

                _, _, _, mcdpow_arr = dtw.dtw_org_to_trg(np.array(cvmcep_src[np.array(spcidx),:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),:], dtype=np.float64))
                _, _, _, mcd_arr = dtw.dtw_org_to_trg(np.array(cvmcep_src[np.array(spcidx),1:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean = np.mean(mcdpow_arr)
                mcdpow_std = np.std(mcdpow_arr)
                mcd_mean = np.mean(mcd_arr)
                mcd_std = np.std(mcd_arr)
                logging.info("mcdpow_rec: %.6f dB +- %.6f" %
                             (mcdpow_mean, mcdpow_std))
                logging.info("mcd_rec: %.6f dB +- %.6f" % (mcd_mean, mcd_std))

                _, _, _, mcdpow_arr = dtw.dtw_org_to_trg(np.array(cvmcep_cyc[np.array(spcidx),:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),:], dtype=np.float64))
                _, _, _, mcd_arr = dtw.dtw_org_to_trg(np.array(cvmcep_cyc[np.array(spcidx),1:], \
                                            dtype=np.float64), np.array(mcep[np.array(spcidx),1:], dtype=np.float64))
                mcdpow_mean_cyc = np.mean(mcdpow_arr)
                mcdpow_std_cyc = np.std(mcdpow_arr)
                mcd_mean_cyc = np.mean(mcd_arr)
                mcd_std_cyc = np.std(mcd_arr)
                logging.info("mcdpow_cyc: %.6f dB +- %.6f" %
                             (mcdpow_mean_cyc, mcdpow_std_cyc))
                logging.info("mcd_cyc: %.6f dB +- %.6f" %
                             (mcd_mean_cyc, mcd_std_cyc))

                dataset = feat_file.split('/')[1].split('_')[0]
                if 'tr' in dataset:
                    logging.info('trn')
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpow_cvlist.append(mcdpow_mean)
                    mcdpowstd_cvlist.append(mcdpow_std)
                    mcd_cvlist.append(mcd_mean)
                    mcdstd_cvlist.append(mcd_std)
                    cvlist.append(np.var(cvmcep_src[:, 1:], axis=0))
                    logging.info(len(cvlist))
                    mcdpow_cvlist_cyc.append(mcdpow_mean_cyc)
                    mcdpow_cvlist_cyc.append(mcdpow_mean_cyc)
                    mcdpowstd_cvlist_cyc.append(mcdpow_std_cyc)
                    mcd_cvlist_cyc.append(mcd_mean_cyc)
                    mcdstd_cvlist_cyc.append(mcd_std_cyc)
                    cvlist_cyc.append(np.var(cvmcep_cyc[:, 1:], axis=0))
                elif 'dv' in dataset:
                    logging.info('dev')
                    mcdpow_cvlist_dv.append(mcdpow_mean)
                    mcdpowstd_cvlist_dv.append(mcdpow_std)
                    mcd_cvlist_dv.append(mcd_mean)
                    mcdstd_cvlist_dv.append(mcd_std)
                    cvlist_dv.append(np.var(cvmcep_src[:, 1:], axis=0))
                    logging.info(len(cvlist_dv))
                    mcdpow_cvlist_cyc_dv.append(mcdpow_mean_cyc)
                    mcdpow_cvlist_cyc_dv.append(mcdpow_mean_cyc)
                    mcdpowstd_cvlist_cyc_dv.append(mcdpow_std_cyc)
                    mcd_cvlist_cyc_dv.append(mcd_mean_cyc)
                    mcdstd_cvlist_cyc_dv.append(mcd_std_cyc)
                    cvlist_cyc_dv.append(np.var(cvmcep_cyc[:, 1:], axis=0))

                logging.info('write rec to h5')
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    args.spk + "-" + args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_rec.shape)
                write_hdf5(feat_file, args.string_path, feat_rec)

                logging.info('write cyc to h5')
                outh5dir = os.path.join(
                    os.path.dirname(os.path.dirname(feat_file)),
                    args.spk + "-" + args.spk + "-" + args.spk)
                if not os.path.exists(outh5dir):
                    os.makedirs(outh5dir)
                feat_file = os.path.join(outh5dir, os.path.basename(feat_file))
                logging.info(feat_file + ' ' + args.string_path)
                logging.info(feat_cyc.shape)
                write_hdf5(feat_file, args.string_path, feat_cyc)

                count += 1
                #if count >= 5:
                #    break

    # parallel decode training
    with mp.Manager() as manager:
        gpu = 0
        processes = []
        cvlist = manager.list()
        mcd_cvlist = manager.list()
        mcdstd_cvlist = manager.list()
        mcdpow_cvlist = manager.list()
        mcdpowstd_cvlist = manager.list()
        cvlist_dv = manager.list()
        mcd_cvlist_dv = manager.list()
        mcdstd_cvlist_dv = manager.list()
        mcdpow_cvlist_dv = manager.list()
        mcdpowstd_cvlist_dv = manager.list()
        cvlist_cyc = manager.list()
        mcd_cvlist_cyc = manager.list()
        mcdstd_cvlist_cyc = manager.list()
        mcdpow_cvlist_cyc = manager.list()
        mcdpowstd_cvlist_cyc = manager.list()
        cvlist_cyc_dv = manager.list()
        mcd_cvlist_cyc_dv = manager.list()
        mcdstd_cvlist_cyc_dv = manager.list()
        mcdpow_cvlist_cyc_dv = manager.list()
        mcdpowstd_cvlist_cyc_dv = manager.list()
        for i, feat_list in enumerate(feat_lists):
            logging.info(i)
            p = mp.Process(target=gpu_decode, args=(feat_list, gpu, cvlist, mcdpow_cvlist, mcdpowstd_cvlist, \
                                                    mcd_cvlist, mcdstd_cvlist, cvlist_dv, mcdpow_cvlist_dv, \
                                                    mcdpowstd_cvlist_dv, mcd_cvlist_dv, mcdstd_cvlist_dv,\
                                                    cvlist_cyc, mcdpow_cvlist_cyc, mcdpowstd_cvlist_cyc, \
                                                    mcd_cvlist_cyc, mcdstd_cvlist_cyc, cvlist_cyc_dv, mcdpow_cvlist_cyc_dv, \
                                                    mcdpowstd_cvlist_cyc_dv, mcd_cvlist_cyc_dv, mcdstd_cvlist_cyc_dv,))
            p.start()
            processes.append(p)
            gpu += 1
            if (i + 1) % args.n_gpus == 0:
                gpu = 0
        # wait for all process
        for p in processes:
            p.join()

        # calculate cv_gv statistics
        if len(mcdpow_cvlist) > 0:
            logging.info("mcdpow_rec: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(mcdpow_cvlist)), \
                        np.std(np.array(mcdpow_cvlist)),np.mean(np.array(mcdpowstd_cvlist)),\
                        np.std(np.array(mcdpowstd_cvlist))))
            logging.info("mcd_rec: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(mcd_cvlist)), \
                        np.std(np.array(mcd_cvlist)),np.mean(np.array(mcdstd_cvlist)),\
                        np.std(np.array(mcdstd_cvlist))))
            cvgv_mean = np.mean(np.array(cvlist), axis=0)
            cvgv_var = np.var(np.array(cvlist), axis=0)
            logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \
                                        np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean))))))
            logging.info("mcdpow_cyc: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(mcdpow_cvlist_cyc)), \
                        np.std(np.array(mcdpow_cvlist_cyc)),np.mean(np.array(mcdpowstd_cvlist_cyc)),\
                        np.std(np.array(mcdpowstd_cvlist_cyc))))
            logging.info("mcd_cyc: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(mcd_cvlist_cyc)), \
                        np.std(np.array(mcd_cvlist_cyc)),np.mean(np.array(mcdstd_cvlist_cyc)),\
                        np.std(np.array(mcdstd_cvlist_cyc))))
            cvgv_mean = np.mean(np.array(cvlist_cyc), axis=0)
            cvgv_var = np.var(np.array(cvlist_cyc), axis=0)
            logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \
                                        np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean))))))

            cvgv_mean = np.mean(np.array(np.r_[cvlist, cvlist_cyc]), axis=0)
            cvgv_var = np.var(np.array(np.r_[cvlist, cvlist_cyc]), axis=0)
            logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \
                                        np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean))))))

            string_path = model_name + "-" + str(config.detach) + "-" + str(
                config.n_half_cyc) + "-" + str(config.lat_dim) + "-" + str(
                    config.ctr_size) + "-" + str(config.ar_enc) + "-" + str(
                        config.ar_dec) + "-" + model_epoch
            logging.info(string_path)

            string_mean = "/recgv_mean_" + string_path
            string_var = "/recgv_var_" + string_path
            write_hdf5(spk_stat, string_mean, cvgv_mean)
            write_hdf5(spk_stat, string_var, cvgv_var)

        if len(mcdpow_cvlist_dv) > 0:
            logging.info("mcdpow_rec_dv: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(mcdpow_cvlist_dv)), \
                        np.std(np.array(mcdpow_cvlist_dv)),np.mean(np.array(mcdpowstd_cvlist_dv)),\
                        np.std(np.array(mcdpowstd_cvlist_dv))))
            logging.info("mcd_rec_dv: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(mcd_cvlist_dv)), \
                        np.std(np.array(mcd_cvlist_dv)),np.mean(np.array(mcdstd_cvlist_dv)),\
                        np.std(np.array(mcdstd_cvlist_dv))))
            cvgv_mean = np.mean(np.array(cvlist_dv), axis=0)
            cvgv_var = np.var(np.array(cvlist_dv), axis=0)
            logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \
                                        np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean))))))
            logging.info("mcdpow_cyc_dv: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(mcdpow_cvlist_cyc_dv)), \
                        np.std(np.array(mcdpow_cvlist_cyc_dv)),np.mean(np.array(mcdpowstd_cvlist_cyc_dv)),\
                        np.std(np.array(mcdpowstd_cvlist_cyc_dv))))
            logging.info("mcd_cyc_dv: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(mcd_cvlist_cyc_dv)), \
                        np.std(np.array(mcd_cvlist_cyc_dv)),np.mean(np.array(mcdstd_cvlist_cyc_dv)),\
                        np.std(np.array(mcdstd_cvlist_cyc_dv))))
            cvgv_mean = np.mean(np.array(cvlist_cyc_dv), axis=0)
            cvgv_var = np.var(np.array(cvlist_cyc_dv), axis=0)
            logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \
                                        np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean))))))
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("--feats_src",
                        default=None,
                        required=True,
                        help="name of the list of hdf5 files")
    parser.add_argument("--feats_trg",
                        default=None,
                        required=True,
                        help="name of the list of hdf5 files")
    parser.add_argument("--feats_trg_all",
                        default=None,
                        help="name of the list of hdf5 files")
    parser.add_argument("--stats",
                        default=None,
                        required=True,
                        help="filename of hdf5 format")
    parser.add_argument("--stats_trg",
                        default=None,
                        help="filename of hdf5 format")
    parser.add_argument("--expdir",
                        required=True,
                        type=str,
                        help="directory to save the log")
    parser.add_argument("--verbose",
                        default=1,
                        type=int,
                        help="log message level")

    args = parser.parse_args()

    # set log level
    if args.verbose == 1:
        logging.basicConfig(
            level=logging.INFO,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filename=args.expdir + "/calc_stats.log")
        logging.getLogger().addHandler(logging.StreamHandler())
    elif args.verbose > 1:
        logging.basicConfig(
            level=logging.DEBUG,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filename=args.expdir + "/calc_stats.log")
        logging.getLogger().addHandler(logging.StreamHandler())
    else:
        logging.basicConfig(
            level=logging.WARN,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S',
            filename=args.expdir + "/calc_stats.log")
        logging.getLogger().addHandler(logging.StreamHandler())
        logging.warn("logging is disabled.")

    # define scaler
    #scaler_sdfeatuvcat_range = StandardScaler()
    scaler_feat_org_lf0_jnt = StandardScaler()
    if args.feats_trg_all is not None:
        scaler_feat_org_lf0_trg_jnt = StandardScaler()

    # read source list
    filenames = read_txt(args.feats_src)
    print("number of source training utterances =", len(filenames))

    for filename in filenames:
        #sdfeatuv_cat_range = read_hdf5(filename, "/sdfeat_uv_cat_range")
        #scaler_sdfeatuvcat_range.partial_fit(sdfeatuv_cat_range[:, :])
        feat_org_lf0 = read_hdf5(filename, "/feat_org_lf0")
        scaler_feat_org_lf0_jnt.partial_fit(feat_org_lf0[:, :])

    # read target list
    filenames = read_txt(args.feats_trg)
    print("number of target training utterances =", len(filenames))

    for filename in filenames:
        #sdfeatuv_cat_range = read_hdf5(filename, "/sdfeat_uv_cat_range")
        #scaler_sdfeatuvcat_range.partial_fit(sdfeatuv_cat_range[:, :])
        feat_org_lf0 = read_hdf5(filename, "/feat_org_lf0")
        scaler_feat_org_lf0_jnt.partial_fit(feat_org_lf0[:, :])
        if args.feats_trg_all is not None:
            scaler_feat_org_lf0_trg_jnt.partial_fit(feat_org_lf0[:, :])

    if args.feats_trg_all is not None:
        # read target all list
        filenames = read_txt(args.feats_trg_all)
        print("number of target all training utterances =", len(filenames))

        for filename in filenames:
            #sdfeatuv_cat_range = read_hdf5(filename, "/sdfeat_uv_cat_range")
            #scaler_sdfeatuvcat_range.partial_fit(sdfeatuv_cat_range[:, :])
            feat_org_lf0 = read_hdf5(filename, "/feat_org_lf0")
            scaler_feat_org_lf0_jnt.partial_fit(feat_org_lf0[:, :])
            scaler_feat_org_lf0_trg_jnt.partial_fit(feat_org_lf0[:, :])

    #mean_sdfeatuvcat_range = scaler_sdfeatuvcat_range.mean_
    #scale_sdfeatuvcat_range = scaler_sdfeatuvcat_range.scale_
    mean_feat_org_lf0_jnt = scaler_feat_org_lf0_jnt.mean_
    scale_feat_org_lf0_jnt = scaler_feat_org_lf0_jnt.scale_
    if args.feats_trg_all is not None:
        mean_feat_org_lf0_trg_jnt = scaler_feat_org_lf0_trg_jnt.mean_
        scale_feat_org_lf0_trg_jnt = scaler_feat_org_lf0_trg_jnt.scale_

    # write to hdf5
    #write_hdf5(args.stats, "/mean_sdfeat_uv_cat_range", mean_sdfeatuvcat_range)
    #write_hdf5(args.stats, "/scale_sdfeat_uv_cat_range", scale_sdfeatuvcat_range)
    print(mean_feat_org_lf0_jnt)
    print(scale_feat_org_lf0_jnt)
    if args.feats_trg_all is not None:
        print(mean_feat_org_lf0_trg_jnt)
        print(scale_feat_org_lf0_trg_jnt)
    write_hdf5(args.stats, "/mean_feat_org_lf0_jnt", mean_feat_org_lf0_jnt)
    write_hdf5(args.stats, "/scale_feat_org_lf0_jnt", scale_feat_org_lf0_jnt)
    if args.feats_trg_all is not None:
        write_hdf5(args.stats_trg, "/mean_feat_org_lf0_trg_jnt",
                   mean_feat_org_lf0_trg_jnt)
        write_hdf5(args.stats_trg, "/scale_feat_org_lf0_trg_jnt",
                   scale_feat_org_lf0_trg_jnt)