Beispiel #1
0
def make_conversion(root,
                    result_dir,
                    checkpoint,
                    ut_min=91,
                    ut_max=100,
                    sp_min=91,
                    sp_max=100):
    alpha = 0.42
    n_fft = 1024
    root = Path(root)
    result_dir = Path(result_dir)
    dicts = torch.load(checkpoint, map_location='cpu')
    model = AE(dicts['config']['model'], train=False)
    model.load_state_dict(dicts['model'])
    model = model.eval()
    for s in range(sp_min, sp_max + 1):
        sp = f'jvs{s:03}'
        sp_root = result_dir / sp
        sp_root.mkdir(parents=True, exist_ok=True)
        sp_dict_path = sp_root / 'sp_dict.pt'

        if not sp_dict_path.is_file():
            nonparas = list(
                (root / sp /
                 'nonpara30/wav24kHz16bit').glob('BASIC5000_*.mcep.npy'))
            index = max(enumerate(nonparas),
                        key=lambda p: p[1].stat().st_size)[0]
            ref_mcep = nonparas[index]
            ref_f0 = ref_mcep.parent / ref_mcep.stem.replace(
                '.mcep', '.f0.npy')
            sp_dict = extract_from(model, ref_mcep, ref_f0, sp_dict_path)
        else:
            sp_dict = torch.load(sp_dict_path)
        for s2 in range(sp_min, sp_max + 1):
            sp2 = f'jvs{s2:03}'
            sp2_root = result_dir / sp2
            sp2_root.mkdir(parents=True, exist_ok=True)

            target_root = sp_root / sp2
            target_root.mkdir(parents=True, exist_ok=True)
            for u in range(ut_min, ut_max + 1):
                src_mcep = root / sp2 / 'parallel100/wav24kHz16bit' / f'VOICEACTRESS100_{u:03}.mcep.npy'
                src_f0 = root / sp2 / 'parallel100/wav24kHz16bit' / f'VOICEACTRESS100_{u:03}.f0.npy'
                src_c0 = root / sp2 / 'parallel100/wav24kHz16bit' / f'VOICEACTRESS100_{u:03}.c0.npy'
                src_ap = root / sp2 / 'parallel100/wav24kHz16bit' / f'VOICEACTRESS100_{u:03}.ap.npy'
                src_dict_path = sp2_root / f'VOICEACTRESS100_{u:03}.pt'
                if not src_dict_path.is_file():
                    src_dict = prep_content(model, src_mcep, src_dict_path)
                else:
                    src_dict = torch.load(src_dict_path)

                converted_mcep = model.reconstruct_mcep(
                    src_dict['c'], src_dict['q'], sp_dict['k'],
                    sp_dict['v']).squeeze().numpy()
                tgt_mcep = target_root / f'VOICEACTRESS100_{u:03}.mcep.npy'
                np.save(tgt_mcep, converted_mcep)

                f0 = np.load(src_f0).astype(np.float64)
                f0 = convert_f0(f0, sp_dict)
                ap = np.load(src_ap).astype(np.float64)
                ap = reconstruct_ap(ap)
                c0 = np.load(src_c0).astype(np.float64)
                assert (
                    c0.shape[0] <= converted_mcep.shape[-1]
                ), f'{s}->{s2}/{u}, {c0.shape[0]} <= {converted_mcep.shape[-1]}'
                mcep = np.hstack(
                    [c0[:, None],
                     converted_mcep[:, :c0.shape[0]].T]).astype(np.float64)
                sp = pysptk.mc2sp(np.ascontiguousarray(mcep), alpha, n_fft)
                wav = pyworld.synthesize(f0, sp, ap, 16000)
                tgt_wav = target_root / f'VOICEACTRESS100_{u:03}.wav'
                wavfile.write(tgt_wav, 16000, (wav * 32768).astype(np.int16))
                print(tgt_wav, flush=True)
Beispiel #2
0
def invert(pred):
    return np.abs(1 - pred)


def save_prediction(pred, out_csv='prediction.csv'):
    with open(out_csv, 'w') as f:
        f.write('id,label\n')
        for i, p in enumerate(pred):
            f.write(f'{i},{p}\n')
    print(f'Save prediction to {out_csv}.')


# load model
model = AE().cuda()
model.load_state_dict(torch.load(model_path))
model.eval()

# 準備 data
trainX = np.load(npy_path)

# 預測答案
latents = inference(X=trainX, model=model)
pred, X_embedded = predict(latents)

# 將預測結果存檔,上傳 kaggle
#save_prediction(pred, 'prediction.csv')

# 由於是 unsupervised 的二分類問題,我們只在乎有沒有成功將圖片分成兩群
# 如果上面的檔案上傳 kaggle 後正確率不足 0.5,只要將 label 反過來就行了
save_prediction(invert(pred), out)