Esempio n. 1
0
def calculate(cv_path, gt_file_list, conf, spkr_conf):

    basename = cv_path.stem
    number, orgspk, tarspk = basename.split("_")
    tarspk = tarspk.split("-")[-1]
    orgspk = orgspk.split("-")[-1]

    # get converted features. If mcep, from h5; else waveform
    if conf["feat_type"] == "mcep":
        cv_mcep = read_feature(cv_path, "feat")
        cv_f0 = read_feature(cv_path, "f0")
    else:
        cv_mcep, cv_f0 = get_world_features(cv_path, tarspk, conf, spkr_conf)

    # get ground truth features
    gt_mcep = read_feature(gt_file_list[f"{tarspk}_{number}"], "mcep")
    gt_f0 = read_feature(gt_file_list[f"{tarspk}_{number}"], "f0")

    # non-silence parts
    gt_idx = np.where(gt_f0 > 0)[0]
    gt_mcep = gt_mcep[gt_idx]
    cv_idx = np.where(cv_f0 > 0)[0]
    cv_mcep = cv_mcep[cv_idx]

    # DTW
    _, path = fastdtw(cv_mcep, gt_mcep, dist=scipy.spatial.distance.euclidean)
    twf = np.array(path).T
    cv_mcep_dtw = cv_mcep[twf[0]]
    gt_mcep_dtw = gt_mcep[twf[1]]

    # MCD
    diff2sum = np.sum((cv_mcep_dtw - gt_mcep_dtw)**2, 1)
    mcd = np.mean(10.0 / np.log(10.0) * np.sqrt(2 * diff2sum), 0)

    return f"{orgspk}-{tarspk}-{number}", mcd
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Convert filter banks to waveform using Griffin-Lim algorithm",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument("--conf",
                        type=str,
                        required=True,
                        help="Cofiguration file")
    parser.add_argument(
        "--rootdir",
        type=str,
        required=True,
        help="Root directory of filter bank h5 files",
    )
    parser.add_argument("--outdir",
                        type=str,
                        required=True,
                        help="Output directory")
    args = parser.parse_args()

    # logging info
    logging.basicConfig(
        level=logging.INFO,
        stream=sys.stdout,
        format="%(asctime)s (%(module)s:%(lineno)d) "
        "%(levelname)s: %(message)s",
    )

    # load configure files
    conf = load_yaml(args.conf)
    for k, v in conf.items():
        logging.info("{}: {}".format(k, v))

    # find h5 files
    feats_files = sorted(list(Path(args.rootdir).glob("*.h5")))
    feats = {
        Path(args.outdir) / filename.stem + ".wav":
        read_feature(filename, "feats")
        for filename in feats_files
    }

    # Main Griffin-Lim algorithm
    Parallel(n_jobs=30)([
        delayed(mlfb2wavf)(
            feats[wavf],
            wavf,
            fs=conf["feature"]["fs"],
            n_mels=conf["feature"]["mlfb_dim"],
            fftl=conf["feature"]["fftl"],
            hop_size=conf["feature"]["hop_size"],
            plot=False,
        ) for wavf in list(feats.keys())
    ])