def test_DCA_short(noise_dataset):
    """Test that a DCA model raises an error when T would break chunking.
    """
    X = noise_dataset
    model = DCA(d=3, T=20)
    model.fit(X)

    with pytest.raises(ValueError):
        model = DCA(d=3, T=20, chunk_cov_estimate=10)
        model.fit(X)

    model = DCA(d=3, T=20, chunk_cov_estimate=2)
    model.fit(X)
def test_DCA_variable_d(noise_dataset):
    """Test that the DCA projection can be refit with different d.
    """
    X = noise_dataset
    model = DCA(d=3, T=10)
    model.estimate_data_statistics(X)
    model.fit_projection()
    assert model.coef_.shape[1] == 3
    assert model.d_fit == 3
    model.fit_projection(d=2)
    assert model.coef_.shape[1] == 2
    assert model.d_fit == 2
def test_init(noise_dataset):
    X = noise_dataset
    model = DCA(d=3, T=10, init='random')
    model.fit(X)
    model = DCA(d=3, T=10, init='uniform')
    model.fit(X)
def test_DCA_variable_T(noise_dataset):
    """Test that the DCA projection can be refit with different d.
    """
    X = noise_dataset
    model = DCA(d=3, T=10)
    model.estimate_data_statistics(X)
    model.rng = np.random.RandomState(0)
    model.fit_projection()
    assert model.T_fit == 10
    c0 = model.coef_.copy()
    model.rng = np.random.RandomState(0)
    model.fit_projection()
    c1 = model.coef_.copy()
    model.rng = np.random.RandomState(0)
    model.fit_projection(T=5)
    assert model.T_fit == 5
    c2 = model.coef_.copy()
    assert_allclose(c0, c1)
    assert not np.allclose(c0, c2)
    with pytest.raises(ValueError):
        model.fit_projection(T=11)
def test_DCA(noise_dataset):
    """Test that a DCA model can be fit with no errors.
    """
    X = noise_dataset
    model = DCA(d=3, T=10)
    model.fit(X)
    assert_allclose(X.mean(axis=0, keepdims=True), model.mean_)
    model.transform(X)
    model.fit_transform(X)
    model.score()

    model = DCA(d=3, T=10, n_init=2)
    model.fit(X)
    model.score()
    model.score(X)

    model = DCA(d=3, T=10, verbose=True)
    model.fit(X)

    model = DCA(d=3, T=10, block_toeplitz=False)
    model.fit(X)
def test_stride_DCA(lorenz_dataset):
    """Check that deterministic and random strides work for DCA.
    """
    X = lorenz_dataset
    model = DCA(T=1)
    model.estimate_data_statistics(X)
    ccms1 = model.cross_covs.numpy()

    model = DCA(T=1, stride=2)
    model.estimate_data_statistics(X)
    ccms2 = model.cross_covs.numpy()
    assert not np.allclose(ccms1, ccms2)
    assert_allclose(ccms1, ccms2, atol=5e-2)

    model = DCA(T=1, stride=.5, rng_or_seed=0)
    model.estimate_data_statistics(X)
    ccms2 = model.cross_covs.numpy()
    assert not np.allclose(ccms1, ccms2)
    assert_allclose(ccms1, ccms2, atol=5e-2)

    model = DCA(T=1, stride=.5, rng_or_seed=1)
    model.estimate_data_statistics(X)
    ccms1 = model.cross_covs.numpy()
    assert not np.allclose(ccms1, ccms2)
    assert_allclose(ccms1, ccms2, atol=5e-2)

    model = DCA(T=1, stride=.5, rng_or_seed=1)
    model.estimate_data_statistics(X)
    ccms2 = model.cross_covs.numpy()
    assert_allclose(ccms1, ccms2)
def test_input_type():
    """Test that a list of 2d arrays or a 3d array work.
    """
    model = DCA(d=3, T=10)

    X = [np.random.randn(1000, 10) for ii in range(3)]
    model.fit(X)
    assert_allclose(np.concatenate(X).mean(axis=0, keepdims=True), model.mean_)
    model.transform(X)
    model.fit_transform(X)

    X = np.random.randn(3, 1000, 10)
    model.fit(X)
    model.transform(X)
    model.fit_transform(X)
def test_input_type():
    """Test that a list of 2d arrays or a 3d array work.
    """
    model = DCA(d=3, T=10)

    X = [np.random.randn(1000, 10) for ii in range(3)]
    model.fit(X)
    model.transform(X)
    model.fit_transform(X)

    X = np.random.randn(3, 1000, 10)
    model.fit(X)
    model.transform(X)
    model.fit_transform(X)
def test_DCA(noise_dataset):
    """Test that a DCA model can be fit with no errors.
    """
    X = noise_dataset
    model = DCA(d=3, T=10)
    model.fit(X)
    model.transform(X)
    model.fit_transform(X)
    model.score()

    model = DCA(d=3, T=10, n_init=2)
    model.fit(X)
    model.score()
    model.score(X)

    model = DCA(d=3, T=10, use_scipy=False)
    model.fit(X)

    model = DCA(d=3, T=10, verbose=True)
    model.fit(X)

    model = DCA(d=3, T=10, block_toeplitz=False)
    model.fit(X)
예제 #10
0
def main(args):
    parser = get_parser()
    parser = DAPC.add_arguments(parser)
    args = parser.parse_args(args)

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # Handle multiple gpu issues.

    T = args.T
    fdim = args.fdim
    encoder_name = args.encoder_type
    params = ''
    print(params)

    idim = 30 # lift projection dim
    noise_dim = 7 # noisify raw DCA
    split_rate = args.split_rate # train/valid split
    snr_vals = [0.3, 1.0, 5.0]  # signal-to-noise ratios
    num_samples = 10000  # samples to collect from the lorenz system

    print("Generating ground truth dynamics ...")
    X_dynamics = gen_lorenz_data(num_samples)  # 10000 * 3

    noisy_model = DNN(X_dynamics.shape[1], idim, dropout=0.5)  # DNN lift projection: 3 -> 30 for d-DCA
    use_gpu = False
    if use_gpu:
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")

    dca_recons = []
    dapc_recons = []
    r2_vals = np.zeros((len(snr_vals), 2))  # obtain R2 scores for DCA and dDCA
    for snr_idx, snr in enumerate(snr_vals):
        print("Generating noisy data with snr=%.2f ..." % snr)
        X_clean, X_noisy = gen_nonlinear_noisy_lorenz(idim, T, snr, X_dynamics=X_dynamics, noisy_model=noisy_model, seed=args.seed)
        X_noisy = X_noisy - X_noisy.mean(axis=0)

        X_clean_train, X_clean_val = split(X_clean, split_rate)
        X_noisy_train, X_noisy_val = split(X_noisy, split_rate)
        X_dyn_train, X_dyn_val = split(X_dynamics, split_rate)
        if not os.path.exists("runs"):
            os.mkdir("runs")

        chunk_size = 500
        X_train_seqs, L_train = chunk_long_seq(X_noisy_train, 30, chunk_size)
        X_valid_seqs, L_valid = chunk_long_seq(X_noisy_val, 30, chunk_size)
        X_clean_seqs, L_clean = chunk_long_seq(X_clean_val, 30, chunk_size)
        X_dyn_seqs, L_dyn = chunk_long_seq(X_dyn_val, 30, chunk_size)
        print(X_train_seqs[0].shape)

        # 0:500 test, 1000:1500 valid
        X_match = torch.from_numpy(_context_concat(X_noisy_val[1000:1500], 0)).float().to(device)
        Y_match = X_dyn_val[1000:1500]
        # Linear DCA
        print("Training {}".format(args.base_encoder_type))

        dca_model = DCA(d=fdim, T=T)
        dca_model.fit(X_train_seqs + X_valid_seqs[:1])
        X_dca = dca_model.transform(X_noisy_val[:500])
        if X_dca.shape[1] > 3:
            X_dca = TSNE(n_components=3).fit_transform(X_dca)

        # deep DCA
        print("Training {}".format(encoder_name))
        dapc_model = DAPC(args.obj, idim, fdim, T, encoder_type=args.encoder_type,
                                                 ortho_lambda=args.ortho_lambda, recon_lambda=args.recon_lambda,
                                                 dropout=args.dropout, masked_recon=args.masked_recon,
                                                 args=args, device=device)

        dapc_model = fit_dapc(dapc_model, X_train_seqs, L_train, X_valid_seqs, L_valid, None, args.lr, use_gpu,
                batch_size=args.batchsize, max_epochs=args.epochs, device=device, snapshot=params + ".cpt", X_match=X_match, Y_match=Y_match, use_writer=False)

        X_dapc = dapc_model.encode(
            torch.from_numpy(_context_concat(X_noisy_val[:500], dapc_model.input_context)).float().to(device,
                                                            dtype=dapc_model.dtype)).cpu().numpy()
        if X_dapc.shape[1] > 3:
            X_dapc = TSNE(n_components=3).fit_transform(X_dapc)

        print(np.matmul((X_dapc - X_dapc.mean(0)).T, (X_dapc - X_dapc.mean(0))) / X_dapc.shape[0])

        if not os.path.exists("pngs"):
            os.mkdir("pngs")

        # match DCA with ground-truth
        if not os.path.exists("npys"):
            os.mkdir("npys")
        np.save("npys/dapc_bases_{}.npy".format(params), X_dapc)
        print("Matching {}".format(args.base_encoder_type))
        X_dca_recon, _ = match(X_dca, X_dyn_val[:500], 15000, device)
        # match DAPC with ground-truth
        print("Matching {}".format(encoder_name))
        X_dapc_recon, _ = match(X_dapc, X_dyn_val[:500], 15000, device)

        # R2 of dca
        r2_dca = 1 - np.sum((X_dca_recon - X_dyn_val[:500]) ** 2) / np.sum(
                (X_dyn_val[:500] - np.mean(X_dyn_val[:500], axis=0)) ** 2)
        print("\nr2_dca:", r2_dca)
        # R2 of dapc
        r2_dapc = 1 - np.sum((X_dapc_recon - X_dyn_val[:500]) ** 2) / np.sum(
                (X_dyn_val[:500] - np.mean(X_dyn_val[:500], axis=0)) ** 2)
        print("r2_dapc:", r2_dapc)
        # store R2's
        r2_vals[snr_idx] = [r2_dca, r2_dapc]
        # store reconstructed signals
        dca_recons.append(X_dca_recon)
        dapc_recons.append(X_dapc_recon)

    if not os.path.exists("plots"):
        os.mkdir("plots")
    if not os.path.exists("plots/{}".format(params)):
        os.mkdir("plots/{}".format(params))

    plot_figs(dca_recons, dapc_recons, X_dyn_val[:500], X_clean_val[:500], X_noisy_val[:500], r2_vals, snr_vals, args.base_encoder_type,
              encoder_name, "plots/{}".format(params))