def main(): args = parse_args() data_dir = os.path.join(args.prjdir, 'data') logging.basicConfig(format="[%(asctime)s] %(message)s", level=logging.DEBUG) logger = logging.getLogger(__name__) dbl_fmt = False for d in ('mgc', 'f0', 'bap'): utils.mkdir(os.path.join(args.outdir, d)) # statistics stats = h5py.File(os.path.join(data_dir, 'stats.h5'), 'r') norm_src_mgc = stats['src']['mgc']['mean'].value, stats['src']['mgc'][ 'var'].value norm_tgt_mgc = stats['tgt']['mgc']['mean'].value, stats['tgt']['mgc'][ 'var'].value norm_src_lf0 = stats['src']['lf0']['mean'].value, stats['src']['lf0'][ 'var'].value norm_tgt_lf0 = stats['tgt']['lf0']['mean'].value, stats['tgt']['lf0'][ 'var'].value stats.close() # list of test data test_scp = os.path.join(data_dir, 'scp', 'test.scp') # window for delta wins = [[1.0], [-0.5, 0.0, 0.5]] # model definition g = Generator(args.ndim_mgc * 2, args.nhid, args.ndim_mgc * 2) logger.info('loading pre-trained generator model from ' + args.outdir) serializers.load_hdf5(os.path.join(args.outdir, 'g.model'), g) if args.gpu >= 0: cuda.get_device(args.gpu).use() g.to_gpu() for fn_data in open(test_scp).readlines(): fn_data = fn_data.rstrip("\n") print(fn_data) # read data # mgc -> converted by using trained DNNs mgc_src = io.readmatrix( os.path.join(data_dir, 'mgc', args.src, fn_data) + '.mgc', dim=args.ndim_mgc + 1, dfmt=dbl_fmt) pow_src = copy.copy(mgc_src[:, 0]) # 0th mgc is not converted # lf0 -> linearly converted by using statistics of src & tgt lf0 lf0_src = io.readvector( os.path.join(data_dir, 'lf0', args.src, fn_data) + '.lf0', dfmt=dbl_fmt) f0_gen = f0map_linear(lf0_src, norm_src_lf0[0], np.sqrt(norm_src_lf0[1]), norm_tgt_lf0[0], np.sqrt(norm_tgt_lf0[1])) # bap -> not converted bap_src = io.readmatrix( os.path.join(data_dir, 'bap', args.src, fn_data) + '.bap', dim=args.ndim_bap, dfmt=dbl_fmt) # predict mgc x = apply_delta_win(mgc_src[:, 1:], wins) R, W = mlpg_test.mlpg_from_pdf(x, wins) R = R.toarray() if norm_src_mgc is not None: x = utils.normalize_gauss(x, norm_src_mgc[0], norm_src_mgc[1]).astype(np.float32) if args.gpu >= 0: x = cuda.to_gpu(x.astype(np.float32), args.gpu) R = cuda.to_gpu(R.astype(np.float32), args.gpu) mgc_gen = g.generate_mgc(Variable(x), R) mgc_gen = cuda.to_cpu(mgc_gen.data) if norm_tgt_mgc is not None: mgc_gen = utils.normalize_gauss(mgc_gen, norm_tgt_mgc[0][:args.ndim_mgc], norm_tgt_mgc[1][:args.ndim_mgc], inv=True) # global varuance post filtering gv_nat = norm_tgt_mgc[1][:args.ndim_mgc] alpha = 1.0 # emphasis coefficient (0 < alpha <= 1.0) y = np.copy(mgc_gen) # [T', D]: converted features (denormalized) gm = np.mean(y, axis=0) # global mean [D, ] gv_gen = np.var(y, axis=0) # global variance [D, ] y = np.sqrt( (alpha * gv_nat + (1.0 - alpha) * gv_gen) / gv_gen) * (y - gm) + gm # emphasized y #mgc_gen = np.hstack((pow_src.reshape((-1, 1)), mgc_gen)) mgc_gen = np.hstack((pow_src.reshape((-1, 1)), y)) # save generated parameters # mgc -> save as double format # f0 -> save as ascii format # bap -> save as double format io.writematrix(mgc_gen, os.path.join(args.outdir, 'mgc', fn_data + '.mgc'), dfmt=True) io.writematrix(bap_src, os.path.join(args.outdir, 'bap', fn_data + '.bap'), dfmt=True) np.savetxt(os.path.join(args.outdir, 'f0', fn_data + '.f0'), f0_gen)
return util_f0.lf02f0(lf0) def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('lf0', type=str, help='name of input lf0 file') parser.add_argument('mean_src', type=float, help='mean lf0 of source speaker') parser.add_argument('std_src', type=float, help='std lf0 of source speaker') parser.add_argument('mean_tgt', type=float, help='mean lf0 of target speaker') parser.add_argument('std_tgt', type=float, help='std lf0 of target speaker') parser.add_argument('out', type=str, help='name of output f0 file') return parser.parse_args() if __name__ == '__main__': args = parse_args() dbl_fmt = False # binary double format. if False, binary float lf0 = io.readvector(args.lf0, dfmt=dbl_fmt) f0 = f0map_linear(lf0, args.mean_src, args.std_src, args.mean_tgt, args.std_tgt) np.savetxt(args.out, f0)
for fn_data in open(train_scp).readlines(): fn_data = fn_data.rstrip("\n") print(fn_data) # read data mgc_src = io.readmatrix( os.path.join('data', 'mgc', args.src, fn_data) + '.mgc', dim=args.dim + 1, dfmt=dbl_fmt) mgc_tgt = io.readmatrix( os.path.join('data', 'mgc', args.tgt, fn_data) + '.mgc', dim=args.dim + 1, dfmt=dbl_fmt) pow_src, pow_tgt = copy.copy(mgc_src[:, 0]), copy.copy(mgc_tgt[:, 0]) lf0_src = io.readvector( os.path.join('data', 'lf0', args.src, fn_data) + '.lf0', dfmt=dbl_fmt) lf0_tgt = io.readvector( os.path.join('data', 'lf0', args.tgt, fn_data) + '.lf0', dfmt=dbl_fmt) lf0s_src = np.hstack((lf0s_src, lf0_src[lf0_src != HTS_U_SYMBOL])) lf0s_tgt = np.hstack((lf0s_tgt, lf0_tgt[lf0_tgt != HTS_U_SYMBOL])) # calculate delta SRC = apply_delta_win(mgc_src[:, 1:], wins) TGT = apply_delta_win(mgc_tgt[:, 1:], wins) # remove sil SRC = SRC[np.where(pow_src >= thval_src)] TGT = TGT[np.where(pow_tgt >= thval_tgt)]
if smooth is True: clf0 = rm_micro_prosody(clf0) return clf0, uv # convert (clf0, uv) to f0 def clf02lf0(clf0, uv): lf0 = np.ones(len(clf0)) * HTS_U_SYMBOL lf0[np.where(uv == 1)] = clf0[np.where(uv == 1)] return lf0 # convert lf0 to f0 def lf02f0(lf0): T = len(lf0) f0 = np.zeros(T) f0[lf0 != HTS_U_SYMBOL] = np.exp(lf0[lf0 != HTS_U_SYMBOL]) return f0 if __name__ == "__main__": name_lf0 = sys.argv[1] name_clf0 = sys.argv[2] name_uv = sys.argv[3] lf0 = io.readvector(name_lf0) # HTS-format log F0 clf0, uv = lf02clf0(lf0, smooth=True) # lf0 -> continuous lf0 io.writevector(clf0, name_clf0) io.writevector(uv, name_uv)