def main():
    args = parse_args()
    data_dir = os.path.join(args.prjdir, 'data')
    logging.basicConfig(format="[%(asctime)s] %(message)s",
                        level=logging.DEBUG)
    logger = logging.getLogger(__name__)
    dbl_fmt = False

    for d in ('mgc', 'f0', 'bap'):
        utils.mkdir(os.path.join(args.outdir, d))

    # statistics
    stats = h5py.File(os.path.join(data_dir, 'stats.h5'), 'r')
    norm_src_mgc = stats['src']['mgc']['mean'].value, stats['src']['mgc'][
        'var'].value
    norm_tgt_mgc = stats['tgt']['mgc']['mean'].value, stats['tgt']['mgc'][
        'var'].value
    norm_src_lf0 = stats['src']['lf0']['mean'].value, stats['src']['lf0'][
        'var'].value
    norm_tgt_lf0 = stats['tgt']['lf0']['mean'].value, stats['tgt']['lf0'][
        'var'].value
    stats.close()

    # list of test data
    test_scp = os.path.join(data_dir, 'scp', 'test.scp')

    # window for delta
    wins = [[1.0], [-0.5, 0.0, 0.5]]

    # model definition
    g = Generator(args.ndim_mgc * 2, args.nhid, args.ndim_mgc * 2)
    logger.info('loading pre-trained generator model from ' + args.outdir)
    serializers.load_hdf5(os.path.join(args.outdir, 'g.model'), g)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        g.to_gpu()

    for fn_data in open(test_scp).readlines():
        fn_data = fn_data.rstrip("\n")
        print(fn_data)
        # read data
        # mgc -> converted by using trained DNNs
        mgc_src = io.readmatrix(
            os.path.join(data_dir, 'mgc', args.src, fn_data) + '.mgc',
            dim=args.ndim_mgc + 1,
            dfmt=dbl_fmt)
        pow_src = copy.copy(mgc_src[:, 0])  # 0th mgc is not converted

        # lf0 -> linearly converted by using statistics of src & tgt lf0
        lf0_src = io.readvector(
            os.path.join(data_dir, 'lf0', args.src, fn_data) + '.lf0',
            dfmt=dbl_fmt)
        f0_gen = f0map_linear(lf0_src, norm_src_lf0[0],
                              np.sqrt(norm_src_lf0[1]), norm_tgt_lf0[0],
                              np.sqrt(norm_tgt_lf0[1]))

        # bap -> not converted
        bap_src = io.readmatrix(
            os.path.join(data_dir, 'bap', args.src, fn_data) + '.bap',
            dim=args.ndim_bap,
            dfmt=dbl_fmt)

        # predict mgc
        x = apply_delta_win(mgc_src[:, 1:], wins)
        R, W = mlpg_test.mlpg_from_pdf(x, wins)
        R = R.toarray()
        if norm_src_mgc is not None:
            x = utils.normalize_gauss(x, norm_src_mgc[0],
                                      norm_src_mgc[1]).astype(np.float32)
        if args.gpu >= 0:
            x = cuda.to_gpu(x.astype(np.float32), args.gpu)
            R = cuda.to_gpu(R.astype(np.float32), args.gpu)

        mgc_gen = g.generate_mgc(Variable(x), R)

        mgc_gen = cuda.to_cpu(mgc_gen.data)
        if norm_tgt_mgc is not None:
            mgc_gen = utils.normalize_gauss(mgc_gen,
                                            norm_tgt_mgc[0][:args.ndim_mgc],
                                            norm_tgt_mgc[1][:args.ndim_mgc],
                                            inv=True)

# global varuance post filtering
        gv_nat = norm_tgt_mgc[1][:args.ndim_mgc]
        alpha = 1.0  # emphasis coefficient (0 < alpha <= 1.0)
        y = np.copy(mgc_gen)  # [T', D]: converted features (denormalized)
        gm = np.mean(y, axis=0)  # global mean [D, ]
        gv_gen = np.var(y, axis=0)  # global variance [D, ]
        y = np.sqrt(
            (alpha * gv_nat +
             (1.0 - alpha) * gv_gen) / gv_gen) * (y - gm) + gm  # emphasized y

        #mgc_gen = np.hstack((pow_src.reshape((-1, 1)), mgc_gen))
        mgc_gen = np.hstack((pow_src.reshape((-1, 1)), y))

        # save generated parameters
        # mgc -> save as double format
        # f0  -> save as ascii format
        # bap -> save as double format
        io.writematrix(mgc_gen,
                       os.path.join(args.outdir, 'mgc', fn_data + '.mgc'),
                       dfmt=True)
        io.writematrix(bap_src,
                       os.path.join(args.outdir, 'bap', fn_data + '.bap'),
                       dfmt=True)
        np.savetxt(os.path.join(args.outdir, 'f0', fn_data + '.f0'), f0_gen)
Example #2
0
    return util_f0.lf02f0(lf0)


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('lf0', type=str, help='name of input lf0 file')
    parser.add_argument('mean_src',
                        type=float,
                        help='mean lf0 of source speaker')
    parser.add_argument('std_src',
                        type=float,
                        help='std lf0 of source speaker')
    parser.add_argument('mean_tgt',
                        type=float,
                        help='mean lf0 of target speaker')
    parser.add_argument('std_tgt',
                        type=float,
                        help='std lf0 of target speaker')
    parser.add_argument('out', type=str, help='name of output f0 file')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()

    dbl_fmt = False  # binary double format. if False, binary float
    lf0 = io.readvector(args.lf0, dfmt=dbl_fmt)
    f0 = f0map_linear(lf0, args.mean_src, args.std_src, args.mean_tgt,
                      args.std_tgt)
    np.savetxt(args.out, f0)
    for fn_data in open(train_scp).readlines():
        fn_data = fn_data.rstrip("\n")
        print(fn_data)
        # read data
        mgc_src = io.readmatrix(
            os.path.join('data', 'mgc', args.src, fn_data) + '.mgc',
            dim=args.dim + 1,
            dfmt=dbl_fmt)
        mgc_tgt = io.readmatrix(
            os.path.join('data', 'mgc', args.tgt, fn_data) + '.mgc',
            dim=args.dim + 1,
            dfmt=dbl_fmt)
        pow_src, pow_tgt = copy.copy(mgc_src[:, 0]), copy.copy(mgc_tgt[:, 0])

        lf0_src = io.readvector(
            os.path.join('data', 'lf0', args.src, fn_data) + '.lf0',
            dfmt=dbl_fmt)
        lf0_tgt = io.readvector(
            os.path.join('data', 'lf0', args.tgt, fn_data) + '.lf0',
            dfmt=dbl_fmt)
        lf0s_src = np.hstack((lf0s_src, lf0_src[lf0_src != HTS_U_SYMBOL]))
        lf0s_tgt = np.hstack((lf0s_tgt, lf0_tgt[lf0_tgt != HTS_U_SYMBOL]))

        # calculate delta
        SRC = apply_delta_win(mgc_src[:, 1:], wins)
        TGT = apply_delta_win(mgc_tgt[:, 1:], wins)

        # remove sil
        SRC = SRC[np.where(pow_src >= thval_src)]
        TGT = TGT[np.where(pow_tgt >= thval_tgt)]
Example #4
0
    if smooth is True:
        clf0 = rm_micro_prosody(clf0)

    return clf0, uv


# convert (clf0, uv) to f0
def clf02lf0(clf0, uv):
    lf0 = np.ones(len(clf0)) * HTS_U_SYMBOL
    lf0[np.where(uv == 1)] = clf0[np.where(uv == 1)]

    return lf0


# convert lf0 to f0
def lf02f0(lf0):
    T = len(lf0)
    f0 = np.zeros(T)
    f0[lf0 != HTS_U_SYMBOL] = np.exp(lf0[lf0 != HTS_U_SYMBOL])
    return f0


if __name__ == "__main__":
    name_lf0 = sys.argv[1]
    name_clf0 = sys.argv[2]
    name_uv = sys.argv[3]
    lf0 = io.readvector(name_lf0)  # HTS-format log F0
    clf0, uv = lf02clf0(lf0, smooth=True)  # lf0 -> continuous lf0
    io.writevector(clf0, name_clf0)
    io.writevector(uv, name_uv)