Beispiel #1
0
def test_Distribution():
    """ Test Distribution class."""
    tf.random.set_seed(1)
    np.random.seed(1)
    Ds = [2, 4]
    num_dists = 1
    N1 = 1000
    N2 = 10
    for D in Ds:
        df = 2 * D
        inv_wishart = scipy.stats.invwishart(df=df, scale=df * np.eye(D))
        for i in range(num_dists):
            nf = NormalizingFlow(
                "autoregressive",
                D,
                1,
                2,
                max(10, D),
                batch_norm=False,
                post_affine=True,
            )
            mu = np.random.normal(0.0, 1.0, (D, 1))
            Sigma = inv_wishart.rvs(1)
            mvn = scipy.stats.multivariate_normal(mu[:, 0], Sigma)
            init_type = "gaussian"
            init_params = {"mu": mu, "Sigma": Sigma}
            opt_df = nf.initialize(
                init_type, init_params, num_iters=2500, load_if_cached=False, save=False
            )
            q_theta = Distribution(nf)

            z = q_theta.sample(N1)
            assert np.isclose(np.mean(z, axis=0), mu[:, 0], rtol=0.1).all()
            cov = np.cov(z.T)
            assert np.sum(np.square(cov - Sigma)) / np.sum(np.square(Sigma)) < 0.1

            z = q_theta.sample(N2)
            assert np.isclose(mvn.logpdf(z), q_theta.log_prob(z), rtol=0.1).all()

            Sigma_inv = np.linalg.inv(Sigma)

            # Test gradient
            grad_true = np.dot(Sigma_inv, mu - z.T).T
            grad_z = q_theta.gradient(z)
            assert (
                np.sum(np.square(grad_true - grad_z)) / np.sum(np.square(grad_true))
                < 0.1
            )

            # Test hessian
            hess_true = np.array(N2 * [-Sigma_inv])
            hess_z = q_theta.hessian(z)
            assert (
                np.sum(np.square(hess_true - hess_z)) / np.sum(np.square(hess_true))
                < 0.1
            )
            with raises(TypeError):
                hess_z = q_theta.hessian("foo")
Beispiel #2
0
def test_unbiased_aug_grad():
    # Test using linear 2D system eps
    N = 100
    z = np.random.normal(0.0, 1.0, (N, 4)).astype(DTYPE)
    log_q_z = np.random.normal(2.0, 3.0, (N, )).astype(DTYPE)
    mu = np.array([0.0, 0.1, 2 * np.pi, 0.1 * np.pi]).astype(DTYPE)

    lb = np.NINF
    ub = np.PINF
    a11 = Parameter("a11", 1, lb, ub)
    a12 = Parameter("a12", 1, lb, ub)
    a21 = Parameter("a21", 1, lb, ub)
    a22 = Parameter("a22", 1, lb, ub)
    params = [a11, a12, a21, a22]
    M = Model("lds", params)
    M.set_eps(linear2D_freq)

    nf = NormalizingFlow(arch_type="autoregressive",
                         D=4,
                         num_stages=1,
                         num_layers=2,
                         num_units=15)

    with tf.GradientTape(persistent=True) as tape:
        z, log_q_z = nf(N)
        params = nf.trainable_variables
        nparams = len(params)
        tape.watch(params)
        _, _, R1s, R2 = aug_lag_vars(z, log_q_z, M.eps, mu, N)
        aug_grad = unbiased_aug_grad(R1s, R2, params, tape)

        T_x_grads = [[[None for i in range(N // 2)] for i in range(4)]
                     for i in range(nparams)]
        T_x = M.eps(z)
        for i in range(N // 2):
            T_x_i_grads = []
            for j in range(4):
                _grads = tape.gradient(T_x[i, j] - mu[j], params)
                for k in range(nparams):
                    T_x_grads[k][j][i] = _grads[k]
    del tape

    # Average across the first half of samples
    for k in range(nparams):
        T_x_grads[k] = np.mean(np.array(T_x_grads[k]), axis=1)

    R2_np = np.mean(T_x[N // 2:, :], 0) - mu
    aug_grad_np = []
    for k in range(nparams):
        aug_grad_np.append(np.tensordot(T_x_grads[k], R2_np, axes=(0, 0)))

    for i in range(nparams):
        assert np.isclose(aug_grad_np[i], aug_grad[i], rtol=1e-3).all()

    return None
Beispiel #3
0
def test_NormalizingFlow_call():
    D = 4
    num_stages = 1
    num_layers = 2
    num_units = 15
    N = 100
    # Check that
    # arch_types = ["autoregressive", "coupling"]
    arch_types = ["coupling"]
    # stage_bijectors = [tfp.bijectors.MaskedAutoregressiveFlow, tfp.bijectors.RealNVP]
    stage_bijectors = [tfp.bijectors.RealNVP]
    for arch_type, stage_bijector in zip(arch_types, stage_bijectors):
        nf = NormalizingFlow(arch_type, D, num_stages, num_layers, num_units)
        z = nf(N)
        bijectors = nf.trans_dist.bijector.bijectors
        assert type(bijectors[1]) is stage_bijector
        assert type(bijectors[0]) is tfp.bijectors.Chain

        nf = NormalizingFlow(arch_type,
                             D,
                             2,
                             num_layers,
                             num_units,
                             batch_norm=True)
        z = nf(N)
        bijectors = nf.trans_dist.bijector.bijectors
        assert type(bijectors[4]) is stage_bijector
        assert type(bijectors[3]) is tfp.bijectors.ScaleMatvecLU
        assert type(bijectors[2]) is epi.batch_norm.BatchNormalization
        assert type(bijectors[1]) is stage_bijector
        assert type(bijectors[0]) is tfp.bijectors.Chain

        nf = NormalizingFlow(arch_type,
                             D,
                             3,
                             num_layers,
                             num_units,
                             batch_norm=True)
        z = nf(N)
        bijectors = nf.trans_dist.bijector.bijectors
        assert type(bijectors[7]) is stage_bijector
        assert type(bijectors[6]) is tfp.bijectors.ScaleMatvecLU
        assert type(bijectors[5]) is epi.batch_norm.BatchNormalization
        assert type(bijectors[4]) is stage_bijector
        assert type(bijectors[3]) is tfp.bijectors.ScaleMatvecLU
        assert type(bijectors[2]) is epi.batch_norm.BatchNormalization
        assert type(bijectors[1]) is stage_bijector
        assert type(bijectors[0]) is tfp.bijectors.Chain

        x = nf.sample(5)
        assert x.shape[0] == 5
        assert x.shape[1] == D

    return None
Beispiel #4
0
def test_boundaries():
    D = 2
    lb = -1 * np.ones((D, ))
    ub = 1 * np.ones((D, ))
    nf = NormalizingFlow(
        "coupling",
        D,
        2,
        2,
        25,
        batch_norm=False,
        elemwise_fn="spline",
        num_bins=32,
        bounds=(lb, ub),
        post_affine=False,
        random_seed=2,
    )
    plot_nf(nf)
    _M = 10
    _x = np.random.normal(0.0, 1.0, (_M, D // 2))
    # print('bin widths')
    # print(tf.reduce_sum(nf.bijector_fns[0](_x), axis=2))

    mu = 0.75 * np.ones((D, ))
    Sigma = 0.25 * np.eye(D)
    # Sigma = 1.*np.eye(D)
    # Sigma[0,1] = 0.75
    # Sigma[1,0] = 0.75
    nf.initialize(mu, Sigma, num_iters=int(5e3), log_rate=200, verbose=True)

    # print('bin widths')
    # print(tf.reduce_sum(nf.bijector_fns[0](_x), axis=2))

    plot_nf(nf)

    return None
Beispiel #5
0
def test_to_string():
    nf = NormalizingFlow("coupling", 4, 1, 2, 15)
    assert nf.to_string() == "D4_C1_L2_U15_PA_rs1"
    # nf = NormalizingFlow("coupling", 100, 2, 4, 200, random_seed=20)
    # assert nf.to_string() == "D100_C2_L4_U200_bnmom=9.90E-01_PA_rs20"
    # nf = NormalizingFlow("coupling", 4, 1, 2, 15, bn_momentum=0.999, post_affine=False)
    # assert nf.to_string() == "D4_C1_L2_U15_bnmom=9.99E-01_rs1"
    nf = NormalizingFlow("autoregressive",
                         4,
                         1,
                         2,
                         15,
                         batch_norm=False,
                         post_affine=False)
    assert nf.to_string() == "D4_AR1_L2_U15_rs1"
    nf = NormalizingFlow("autoregressive",
                         4,
                         4,
                         2,
                         15,
                         batch_norm=False,
                         post_affine=False)
    assert nf.to_string() == "D4_AR4_L2_U15_rs1"
Beispiel #6
0
def tf_num_params(N):
    D = int(2 * N * RANK)
    nf = NormalizingFlow(
        D=D,
        arch_type="coupling",
        num_stages=3,
        num_layers=2,
        num_units=100,
        elemwise_fn="affine",
        batch_norm=False,
        bn_momentum=0.0,
        post_affine=True,
        random_seed=1,
    )
    x, log_prob = nf(10)
    num_params = 0
    for tf_var in nf.trainable_variables:
        num_params += np.prod(tf_var.shape)
    return num_params
Beispiel #7
0
def test_epi():
    mu = np.array([0.0, 0.1, 2 * np.pi, 0.1 * np.pi])

    lb_a12 = 0.0
    ub_a12 = 10.0
    lb_a21 = -10.0
    ub_a21 = 0.0
    a11 = Parameter("a11", 1, 0.0)
    a12 = Parameter("a12", 1, lb_a12, ub_a12)
    a21 = Parameter("a21", 1, lb_a21, ub_a21)
    a22 = Parameter("a22", 1, ub=0.0)
    params = [a11, a12, a21, a22]

    M = Model("lds", params)
    M.set_eps(linear2D_freq)
    q_theta, opt_data, save_path, _ = M.epi(
        mu, num_iters=100, K=1, save_movie_data=True
    )
    g = q_theta.plot_dist()
    M.epi_opt_movie(save_path)

    params = [a11, a12, a21, a22]
    M = Model("lds_2D", params)
    M.set_eps(linear2D_freq)
    q_theta, opt_data, save_path, _ = M.epi(
        mu, num_iters=100, K=1, save_movie_data=True
    )

    q_theta = M.load_epi_dist(mu, k=1)

    M.epi_opt_movie(save_path)
    q_theta, opt_data, save_path, _ = M.epi(
        mu, num_units=31, num_iters=100, K=1, save_movie_data=True
    )
    M.plot_epi_hpsearch(mu)

    opt_data_filename = save_path + "opt_data.csv"

    opt_data_cols = ["k", "iteration", "H", "converged"] + [
        "R%d" % i for i in range(1, M.m + 1)
    ]
    for x, y in zip(opt_data.columns, opt_data_cols):
        assert x == y

    # opt_data_df = pd.read_csv(opt_data_filename)
    # opt_data_df['iteration'] = 2*opt_data_df['iteration']
    # opt_data_df.to_csv(opt_data_filename)
    # with raises(IOError):
    #    M.epi_opt_movie(save_path)
    # os.remove(opt_data_filename)
    # with raises(IOError):
    #    M.epi_opt_movie(save_path)
    assert q_theta is not None
    with raises(ValueError):
        q_theta = M.load_epi_dist(mu, k=20)
    with raises(TypeError):
        q_theta = M.load_epi_dist(mu, k="foo")
    with raises(ValueError):
        q_theta = M.load_epi_dist(mu, k=-1)

    M = Model("foo", params)
    with raises(ValueError):
        q_theta = M.load_epi_dist(mu, k=-1)

    z = q_theta(1000)
    log_q_z = q_theta.log_prob(z)
    assert np.sum(z[:, 0] < 0.0) == 0
    assert np.sum(z[:, 1] < lb_a12) == 0
    assert np.sum(z[:, 1] > ub_a12) == 0
    assert np.sum(z[:, 2] < lb_a21) == 0
    assert np.sum(z[:, 2] > ub_a21) == 0
    assert np.sum(z[:, 3] > 0.0) == 0
    assert np.sum(1 - np.isfinite(z)) == 0
    assert np.sum(1 - np.isfinite(log_q_z)) == 0

    # Intentionally swap order in list to insure proper handling.
    params = [a22, a21, a12, a11]
    M = Model("lds2", params)
    M.set_eps(linear2D_freq)
    q_theta, opt_data, save_path, _ = M.epi(
        mu, K=2, num_iters=100, stop_early=True, verbose=True
    )
    with raises(IOError):
        M.epi_opt_movie(save_path)

    z = q_theta(1000)
    log_q_z = q_theta.log_prob(z)
    assert np.sum(z[:, 0] < 0.0) == 0
    assert np.sum(z[:, 1] < lb_a12) == 0
    assert np.sum(z[:, 1] > ub_a12) == 0
    assert np.sum(z[:, 2] < lb_a21) == 0
    assert np.sum(z[:, 2] > ub_a21) == 0
    assert np.sum(z[:, 3] > 0.0) == 0
    assert np.sum(1 - np.isfinite(z)) == 0
    assert np.sum(1 - np.isfinite(log_q_z)) == 0

    for x, y in zip(opt_data.columns, opt_data_cols):
        assert x == y

    with raises(ValueError):

        def bad_f(a11, a12, a21, a22):
            return tf.expand_dims(a11 + a12 + a21 + a22, 0)

        M.set_eps(bad_f)

    params = [a22, a21, a12, a11]
    M = Model("lds2", params)
    nf = NormalizingFlow("autoregressive", 4, 1, 2, 10)
    al_hps = AugLagHPs()
    with raises(AttributeError):
        save_path = M.get_save_path(mu, nf, al_hps, None)
    save_path = M.get_save_path(mu, nf, al_hps, eps_name="foo")
    return None
Beispiel #8
0
def test_epi():
    mu = np.array([0.0, 0.1, 2 * np.pi, 0.1 * np.pi])

    lb_a12 = 0.0
    ub_a12 = 10.0
    lb_a21 = -10.0
    ub_a21 = 0.0
    a11 = Parameter("a11", 1, 0.0)
    a12 = Parameter("a12", 1, lb_a12, ub_a12)
    a21 = Parameter("a21", 1, lb_a21, ub_a21)
    a22 = Parameter("a22", 1, ub=0.0)
    params = [a11, a12, a21, a22]

    M = Model("lds_2D", params)
    M.set_eps(linear2D_freq)
    q_theta, opt_data, epi_path, failed = M.epi(
        mu, num_iters=100, K=1, save_movie_data=True, log_rate=10,
    )
    z = q_theta(50)
    g = q_theta.plot_dist(z)
    M.epi_opt_movie(epi_path)

    params = [a11, a12, a21, a22]
    # should load from prev epi
    M = Model("lds_2D", params)
    M.set_eps(linear2D_freq)
    q_theta, opt_data, epi_path, failed = M.epi(
        mu, num_iters=100, K=1, save_movie_data=True
    )

    print("epi_path", epi_path)

    epi_df = M.get_epi_df()
    epi_df_row = epi_df[epi_df["iteration"] == 100].iloc[0]
    q_theta = M.get_epi_dist(epi_df_row)

    opt_data_filename = os.path.join(epi_path, "opt_data.csv")

    M.set_eps(linear2D_freq)
    q_theta, opt_data, epi_path, failed = M.epi(
        mu, num_iters=100, K=1, save_movie_data=True, log_rate=10,
    )
    opt_data_cols = ["k", "iteration", "H", "cost", "converged"] + [
        "R%d" % i for i in range(1, M.m + 1)
    ]
    for x, y in zip(opt_data.columns, opt_data_cols):
        assert x == y

    assert q_theta is not None

    z = q_theta(1000)
    log_q_z = q_theta.log_prob(z)
    assert np.sum(z[:, 0] < 0.0) == 0
    assert np.sum(z[:, 1] < lb_a12) == 0
    assert np.sum(z[:, 1] > ub_a12) == 0
    assert np.sum(z[:, 2] < lb_a21) == 0
    assert np.sum(z[:, 2] > ub_a21) == 0
    assert np.sum(z[:, 3] > 0.0) == 0
    assert np.sum(1 - np.isfinite(z)) == 0

    # Intentionally swap order in list to insure proper handling.
    params = [a22, a21, a12, a11]
    M = Model("lds", params)
    M.set_eps(linear2D_freq)
    q_theta, opt_data, epi_path, _ = M.epi(
        mu,
        K=2,
        num_iters=100,
        stop_early=True,
        verbose=True,
        save_movie_data=True,
        log_rate=10,
    )
    M.epi_opt_movie(epi_path)

    z = q_theta(1000)
    log_q_z = q_theta.log_prob(z)
    assert np.sum(z[:, 0] < 0.0) == 0
    assert np.sum(z[:, 1] < lb_a12) == 0
    assert np.sum(z[:, 1] > ub_a12) == 0
    assert np.sum(z[:, 2] < lb_a21) == 0
    assert np.sum(z[:, 2] > ub_a21) == 0
    assert np.sum(z[:, 3] > 0.0) == 0
    assert np.sum(1 - np.isfinite(z)) == 0

    print("DOING ABC NOW")
    # Need finite support for ABC
    a11 = Parameter("a11", 1, -10.0, 10.0)
    a12 = Parameter("a12", 1, -10.0, 10.0)
    a21 = Parameter("a21", 1, -10.0, 10.0)
    a22 = Parameter("a22", 1, -10.0, 10.0)
    params = [a11, a12, a21, a22]
    M = Model("lds_2D", params)
    M.set_eps(linear2D_freq)
    init_type = "abc"
    init_params = {"num_keep": 50, "mean": mu[:2], "std": np.sqrt(mu[2:])}

    q_theta, opt_data, epi_path, failed = M.epi(
        mu,
        num_iters=100,
        K=1,
        init_type=init_type,
        init_params=init_params,
        save_movie_data=True,
        log_rate=10,
    )

    params = [a11, a12, a21, a22]
    M = Model("lds2", params)
    M.set_eps(linear2D_freq)
    # This should cause opt to fail with nan since c0=1e20 is too high.
    q_theta, opt_data, epi_path, _ = M.epi(
        mu,
        K=3,
        num_iters=1000,
        c0=1e20,
        stop_early=True,
        verbose=True,
        save_movie_data=False,
        log_rate=10,
    )
    with raises(IOError):
        M.epi_opt_movie(epi_path)

    for x, y in zip(opt_data.columns, opt_data_cols):
        assert x == y

    with raises(ValueError):

        def bad_f(a11, a12, a21, a22):
            return tf.expand_dims(a11 + a12 + a21 + a22, 0)

        M.set_eps(bad_f)

    params = [a11, a12, a21, a22]
    M = Model("lds2", params)
    init_params = {"mu": 2 * np.zeros((4,)), "Sigma": np.eye(4)}
    nf = NormalizingFlow("autoregressive", 4, 1, 2, 10)
    al_hps = AugLagHPs()
    epi_path, exists = M.get_epi_path(init_params, nf, mu, al_hps, eps_name="foo")
    assert not exists
    return None
Beispiel #9
0
def test_initialization():
    D = 4
    nf = NormalizingFlow("coupling",
                         D,
                         2,
                         2,
                         15,
                         batch_norm=False,
                         post_affine=True)
    mu = -0.5 * np.ones((D, ))
    Sigma = 2.0 * np.eye(D)
    nf.initialize(mu, Sigma, num_iters=int(5e3), verbose=True)

    z = nf.sample(int(1e4))
    z = z.numpy()
    mean_z = np.mean(z, 0)
    Sigma_z = np.cov(z.T)
    assert np.isclose(mean_z, mu, atol=0.5).all()
    assert np.isclose(Sigma_z, Sigma, atol=0.5).all()

    # For init load
    nf.initialize(mu, Sigma, verbose=True)

    # Bounds
    lb = -0 * np.ones((D, ))
    ub = 2 * np.ones((D, ))
    nf = NormalizingFlow(
        "autoregressive",
        D,
        2,
        2,
        15,
        batch_norm=True,
        bounds=(lb, ub),
    )
    nf.initialize(mu, Sigma, num_iters=int(5e3), verbose=True)

    return None
Beispiel #10
0
def test_to_string():
    nf = NormalizingFlow("coupling", 4, 1, 2, 15)
    assert nf.to_string() == "D4_C1_affine_L2_U15_bnmom=0.00E+00_PA_rs1"

    nf = NormalizingFlow(
        "coupling",
        100,
        2,
        4,
        200,
        elemwise_fn="spline",
        batch_norm=False,
        random_seed=20,
    )
    assert nf.to_string() == "D100_C2_spline_L4_U200_bins=4_PA_rs20"

    nf = NormalizingFlow("coupling",
                         4,
                         1,
                         2,
                         15,
                         bn_momentum=0.999,
                         post_affine=False)
    assert nf.to_string() == "D4_C1_affine_L2_U15_bnmom=9.99E-01_rs1"

    nf = NormalizingFlow("autoregressive",
                         4,
                         1,
                         2,
                         15,
                         batch_norm=False,
                         post_affine=False)
    assert nf.to_string() == "D4_AR1_affine_L2_U15_rs1"

    nf = NormalizingFlow("autoregressive",
                         4,
                         4,
                         2,
                         15,
                         batch_norm=False,
                         post_affine=False)
    assert nf.to_string() == "D4_AR4_affine_L2_U15_rs1"
Beispiel #11
0
def test_NormalizingFlow_init():
    """Test architecture initialization."""
    arch_type = "coupling"
    D = 4
    num_stages = 1
    num_layers = 2
    num_units = 15

    tf.random.set_seed(0)
    np.random.seed(0)

    # Check setters.
    nf = NormalizingFlow(arch_type, D, num_stages, num_layers, num_units)
    assert nf.arch_type == "coupling"
    assert nf.D == D
    assert nf.num_stages == num_stages
    assert nf.num_layers == num_layers
    assert nf.num_units == num_units
    assert nf.batch_norm
    assert nf.post_affine
    assert nf.lb is None
    assert nf.ub is None
    assert nf.random_seed == 1

    # Test autoregressive
    nf = NormalizingFlow("autoregressive", D, num_stages, num_layers,
                         num_units)
    assert nf.arch_type == "autoregressive"

    lb = -2.0 * np.ones((D, ))
    ub = 2.0 * np.ones((D, ))
    bounds = (lb, ub)
    nf = NormalizingFlow(
        arch_type,
        D,
        num_stages,
        num_layers,
        num_units,
        "affine",
        32,
        False,
        None,
        False,
        bounds,
        5,
    )
    assert not nf.batch_norm
    assert not nf.post_affine
    assert np.equal(nf.lb, lb).all()
    assert np.equal(nf.ub, ub).all()
    assert nf.random_seed == 5

    nf = NormalizingFlow(
        arch_type,
        D,
        num_stages,
        num_layers,
        num_units,
        "affine",
        32,
        False,
        None,
        False,
        [lb, ub],
        5,
    )
    assert np.equal(nf.lb, lb).all()
    assert np.equal(nf.ub, ub).all()

    # Test error handling.
    with raises(TypeError):
        nf = NormalizingFlow(0, D, num_stages, num_layers, num_units)
    with raises(ValueError):
        nf = NormalizingFlow("foo", D, num_stages, num_layers, num_units)

    with raises(TypeError):
        nf = NormalizingFlow(arch_type, 2.0, num_stages, num_layers, num_units)
    with raises(ValueError):
        nf = NormalizingFlow(arch_type, 1, num_stages, num_layers, num_units)

    with raises(TypeError):
        nf = NormalizingFlow(arch_type, D, 2.0, num_layers, num_units)
    with raises(ValueError):
        nf = NormalizingFlow(arch_type, D, -1, num_layers, num_units)

    with raises(TypeError):
        nf = NormalizingFlow(arch_type, D, num_stages, 2.0, num_units)
    with raises(ValueError):
        nf = NormalizingFlow(arch_type, D, num_stages, 0, num_units)

    with raises(TypeError):
        nf = NormalizingFlow(arch_type, D, num_stages, num_layers, 2.0)
    with raises(ValueError):
        nf = NormalizingFlow(arch_type, D, num_stages, num_layers, 0)

    with raises(TypeError):
        nf = NormalizingFlow(arch_type, D, num_stages, num_layers, 2.0)
    with raises(ValueError):
        nf = NormalizingFlow(arch_type, D, num_stages, num_layers, 0)

    with raises(TypeError):
        nf = NormalizingFlow(arch_type,
                             D,
                             num_stages,
                             num_layers,
                             num_units,
                             batch_norm=1.0)

    with raises(TypeError):
        nf = NormalizingFlow(
            arch_type,
            D,
            num_stages,
            num_layers,
            num_units,
            batch_norm=True,
            bn_momentum="foo",
        )
    with raises(TypeError):
        nf = NormalizingFlow(
            arch_type,
            D,
            num_stages,
            num_layers,
            num_units,
            post_affine="foo",
        )

    with raises(ValueError):
        nf = NormalizingFlow(arch_type,
                             D,
                             num_stages,
                             num_layers,
                             num_units,
                             bounds=(lb, ub, ub))
    with raises(TypeError):
        nf = NormalizingFlow(arch_type,
                             D,
                             num_stages,
                             num_layers,
                             num_units,
                             bounds=("foo", "bar"))

    with raises(TypeError):
        nf = NormalizingFlow(arch_type,
                             D,
                             num_stages,
                             num_layers,
                             num_units,
                             bounds="foo")

    with raises(TypeError):
        nf = NormalizingFlow(arch_type,
                             D,
                             num_stages,
                             num_layers,
                             num_units,
                             random_seed=1.0)

    # Check that q0 has correct statistics
    nf = NormalizingFlow(arch_type, D, num_stages, num_layers, num_units)
    z = nf.q0.sample(100000).numpy()
    assert np.isclose(np.mean(z, 0), np.zeros((D, )), atol=1e-2).all()
    assert np.isclose(np.cov(z.T), np.eye(D), atol=1e-1).all()

    return None
Beispiel #12
0
    def epi(
        self,
        mu,
        arch_type="coupling",
        num_stages=3,
        num_layers=2,
        num_units=None,
        batch_norm=True,
        bn_momentum=0.99,
        post_affine=False,
        random_seed=1,
        init_type=None,  #"iso_gauss",
        init_params=None,  #{"loc": 0.0, "scale": 1.0},
        K=10,
        num_iters=1000,
        N=500,
        lr=1e-3,
        c0=1.0,
        gamma=0.25,
        beta=4.0,
        alpha=0.05,
        nu=1.0,
        stop_early=False,
        log_rate=50,
        verbose=False,
        save_movie_data=False,
    ):
        """Runs emergent property inference for this model with mean parameter :math:`\\mu`.


        :param mu: Mean parameter of the emergent property.
        :type mu: np.ndarray
        :param arch_type: :math:`\\in` :obj:`['autoregressive', 'coupling']`, defaults to :obj:`'coupling'`.
        :type arch_type: str, optional
        :param num_stages: Number of coupling or autoregressive stages, defaults to 3.
        :type num_stages: int, optional
        :param num_layers: Number of neural network layer per conditional, defaults to 2.
        :type num_layers: int, optional
        :param num_units: Number of units per layer, defaults to max(2D, 15).
        :type num_units: int, optional
        :param batch_norm: Use batch normalization between stages, defaults to True.
        :type batch_norm: bool, optional
        :param bn_momentum: Batch normalization momentum parameter, defaults to 0.99.
        :type bn_momentrum: float, optional
        :param post_affine: Shift and scale following main transform, defaults to False.
        :type post_affine: bool, optional
        :param random_seed: Random seed of architecture parameters, defaults to 1.
        :type random_seed: int, optional
        :param init_type: :math:`\\in` :obj:`['iso_gauss', 'gaussian']`.
        :type init_type: str, optional
        :param init_params: Parameters according to :obj:`init_type`.
        :type init_params: dict, optional
        :param K: Number of augmented Lagrangian iterations, defaults to 10.
        :type K: int, float, optional
        :param num_iters: Number of optimization iterations, defaults to 1000.
        :type num_iters: int, optional
        :param N: Number of batch samples per iteration, defaults to 500.
        :type N: int, optional
        :param lr: Adam optimizer learning rate, defaults to 1e-3.
        :type lr: float, optional
        :param c0: Initial augmented Lagrangian coefficient, defaults to 1.0.
        :type c0: float, optional
        :param gamma: Augmented lagrangian hyperparameter, defaults to 0.25.
        :type gamma: float, optional
        :param beta: Augmented lagrangian hyperparameter, defaults to 4.0.
        :type beta: float, optional
        :param alpha: P-value threshold for convergence testing, defaults to 0.05.
        :type alpha: float, optional
        :param nu: Fraction of N for convergence testing, defaults to 0.1.
        :type nu: float, optional
        :param stop_early: Exit if converged, defaults to False.
        :type stop_early: bool, optional
        :param log_rate: Record optimization data every so iterations, defaults to 100.
        :type log_rate: int, optional
        :param verbose: Print optimization information, defaults to False.
        :type verbose: bool, optional
        :param save_movie_data: Save data for making optimization movie, defaults to False.
        :type save_movie_data: bool, optional
        :returns: q_theta, opt_df, save_path
        :rtype: epi.models.Distribution, pandas.DataFrame, str
        """
        if num_units is None:
            num_units = max(2 * self.D, 15)

        nf = NormalizingFlow(
            arch_type=arch_type,
            D=self.D,
            num_stages=num_stages,
            num_layers=num_layers,
            num_units=num_units,
            batch_norm=batch_norm,
            bn_momentum=bn_momentum,
            post_affine=post_affine,
            bounds=self._get_bounds(),
            random_seed=random_seed,
        )

        # Hyperparameter object
        aug_lag_hps = AugLagHPs(N, lr, c0, gamma, beta)

        # Initialize architecture to gaussian.
        print("Initializing %s architecture." % nf.to_string(), flush=True)
        if init_type is None or init_params is None:
            mu_init = np.zeros((self.D))
            Sigma = np.zeros((self.D, self.D))
            for i in range(self.D):
                if (np.isneginf(nf.lb[i]) and np.isposinf(nf.ub[i])):
                    mu_init[i] = 0.
                    Sigma[i, i] = 1.
                elif (np.isneginf(nf.lb[i])):
                    mu_init[i] = self.ub[i] - 2.
                    Sigma[i, i] = 1.
                elif (np.isposinf(nf.ub[i])):
                    mu_init[i] = self.lb[i] + 2.
                    Sigma[i, i] = 1.
                else:
                    mu_init[i] = (nf.lb[i] + nf.ub[i]) / 2.
                    Sigma[i, i] = (nf.ub[i] - nf.lb[i]) / 2.
            init_type = "gaussian"
            init_params = {'mu': mu_init, 'Sigma': Sigma}
        nf.initialize(init_type, init_params)

        # Checkpoint the initialization.
        optimizer = tf.keras.optimizers.Adam(lr)
        ckpt = tf.train.Checkpoint(optimizer=optimizer, model=nf)
        ckpt_dir = self.get_save_path(mu, nf, aug_lag_hps)
        manager = tf.train.CheckpointManager(ckpt,
                                             directory=ckpt_dir,
                                             max_to_keep=None)
        manager.save(checkpoint_number=0)
        print("Saving EPI models to %s." % ckpt_dir, flush=True)

        @tf.function
        def train_step(eta, c):
            with tf.GradientTape(persistent=True) as tape:
                z, log_q_z = nf(N)
                params = nf.trainable_variables
                tape.watch(params)
                H, R, R1s, R2 = aug_lag_vars(z, log_q_z, self.eps, mu, N)
                neg_H = -H
                lagrange_dot = tf.reduce_sum(tf.multiply(eta, R))
            aug_l2 = c / 2.0 * tf.reduce_sum(tf.square(R))
            cost = neg_H + lagrange_dot + aug_l2
            H_grad = tape.gradient(neg_H, params)
            lagrange_grad = tape.gradient(lagrange_dot, params)
            aug_grad = unbiased_aug_grad(R1s, R2, params, tape)
            gradients = [
                g1 + g2 + c * g3
                for g1, g2, g3 in zip(H_grad, lagrange_grad, aug_grad)
            ]
            optimizer.apply_gradients(zip(gradients, params))
            return cost, H, R, z, log_q_z

        @tf.function
        def two_dim_T_x_batch(nf, eps, M, N, m):
            z, _ = nf(M * N)
            T_x = eps(z)
            T_x = tf.reshape(T_x, (M, N, m))
            return T_x

        @tf.function
        def get_R_norm_dist(nf, eps, mu, M, N):
            m = mu.shape[1]
            T_x = two_dim_T_x_batch(nf, eps, M, N, m)
            return tf.reduce_sum(tf.square(tf.reduce_mean(T_x, axis=1) - mu),
                                 axis=1)

        @tf.function
        def get_R_mean_dist(nf, eps, mu, M, N):
            m = mu.shape[1]
            T_x = two_dim_T_x_batch(nf, eps, M, N, m)
            return tf.reduce_mean(T_x, axis=1) - mu

        M_test = 200
        N_test = int(nu * N)
        M_norm = 200
        # Initialize augmented Lagrangian parameters eta and c.
        eta, c = np.zeros((self.m, ), np.float32), c0
        etas, cs = np.zeros((K, self.m)), np.zeros((K, ))

        # Initialize optimization data frame.
        z, log_q_z = nf(N)
        H_0, R_0, _, _ = aug_lag_vars(z, log_q_z, self.eps, mu, N)
        cost_0 = -H_0 + np.dot(eta, R_0) + np.sum(np.square(R_0))
        R_keys = ["R%d" % (i + 1) for i in range(self.m)]
        opt_it_dfs = [self._opt_it_df(0, 0, H_0.numpy(), R_0.numpy(), R_keys)]

        # Record samples for movie.
        if save_movie_data:
            N_save = 200
            zs = [z.numpy()[:N_save, :]]
            log_q_zs = [log_q_z.numpy()[:N_save]]

        # Measure initial R norm distribution.
        mu_colvec = np_column_vec(mu).astype(np.float32).T
        norms = get_R_norm_dist(nf, self.eps, mu_colvec, M_norm, N)

        # EPI optimization
        print(format_opt_msg(0, 0, cost_0, H_0, R_0), flush=True)
        failed = False
        for k in range(1, K + 1):
            etas[k - 1], cs[k - 1], eta, c
            for i in range(1, num_iters + 1):
                time1 = time.time()
                cost, H, R, z, log_q_z = train_step(eta, c)
                time2 = time.time()
                if i % log_rate == 0:
                    if verbose:
                        print(format_opt_msg(k, i, cost, H, R), flush=True)
                    iter = (k - 1) * num_iters + i
                    opt_it_dfs.append(
                        self._opt_it_df(k, iter, H.numpy(), R.numpy(), R_keys))
                    if save_movie_data:
                        zs.append(z.numpy()[:N_save, :])
                        log_q_zs.append(log_q_z.numpy()[:N_save])
                if np.isnan(cost):
                    failed = True
                    break
            if not verbose:
                print(format_opt_msg(k, i, cost, H, R), flush=True)

            # Save epi optimization data following aug lag iteration k.
            opt_it_df = pd.concat(opt_it_dfs, ignore_index=True)
            manager.save(checkpoint_number=k)

            if failed:
                converged = False
            else:
                R_means = get_R_mean_dist(nf, self.eps, mu_colvec, M_test,
                                          N_test)
                converged = self.test_convergence(R_means.numpy(), alpha)
            last_ind = opt_it_df["iteration"] == k * num_iters

            opt_it_df.loc[last_ind, "converged"] = converged
            self._save_epi_opt(ckpt_dir, opt_it_df, cs, etas)
            opt_it_dfs = [opt_it_df]

            if k < K:
                if np.isnan(cost):
                    break
                # Check for convergence if early stopping.
                if stop_early and converged:
                    break

                # Update eta and c
                eta = eta + c * R
                norms_k = get_R_norm_dist(nf, self.eps, mu_colvec, M_norm, N)
                t, p = ttest_ind(norms_k.numpy(),
                                 gamma * norms.numpy(),
                                 equal_var=False)
                u = np.random.rand(1)
                if u < 1 - p / 2.0 and t > 0.0:
                    c = beta * c
                norms = norms_k

        time_per_it = time2 - time1
        if save_movie_data:
            np.savez(
                ckpt_dir + "movie_data.npz",
                zs=np.array(zs),
                log_q_zs=np.array(log_q_zs),
                time_per_it=time_per_it,
                iterations=np.arange(0, k * num_iters + 1, log_rate),
            )
        else:
            np.savez(
                ckpt_dir + "timing.npz",
                time_per_it=time_per_it,
            )

        # Save hyperparameters.
        self._save_hps(ckpt_dir, nf, aug_lag_hps, init_type, init_params)

        # Return optimized distribution.
        q_theta = Distribution(nf, self.parameters)

        return q_theta, opt_it_dfs[0], ckpt_dir, failed
Beispiel #13
0
    def load_epi_dist(
        self,
        mu,
        k=None,
        alpha=None,
        nu=0.1,
        arch_type="coupling",
        num_stages=3,
        num_layers=2,
        num_units=None,
        batch_norm=True,
        bn_momentum=0.99,
        post_affine=False,
        random_seed=1,
        init_type="iso_gauss",
        init_params={
            "loc": 0.0,
            "scale": 1.0
        },
        N=500,
        lr=1e-3,
        c0=1.0,
        gamma=0.25,
        beta=4.0,
    ):

        if k is not None:
            if type(k) is not int:
                raise TypeError(
                    format_type_err_msg("Model.load_epi_dist", "k", k, int))
            if k < 0:
                raise ValueError(
                    "k must be augmented Lagrangian iteration index.")

        if num_units is None:
            num_units = max(2 * self.D, 15)

        nf = NormalizingFlow(
            arch_type=arch_type,
            D=self.D,
            num_stages=num_stages,
            num_layers=num_layers,
            num_units=num_units,
            batch_norm=batch_norm,
            bn_momentum=bn_momentum,
            post_affine=post_affine,
            bounds=self._get_bounds(),
            random_seed=random_seed,
        )

        aug_lag_hps = AugLagHPs(N, lr, c0, gamma, beta)
        optimizer = tf.keras.optimizers.Adam(lr)
        checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=nf)
        ckpt_dir = self.get_save_path(mu, nf, aug_lag_hps)
        ckpt_state = tf.train.get_checkpoint_state(ckpt_dir)
        if ckpt_state is not None:
            ckpts = ckpt_state.all_model_checkpoint_paths
        else:
            raise ValueError("No checkpoints found.")

        if k is not None:
            if k >= len(ckpts):
                raise ValueError("Index of checkpoint 'k' too large.")
            status = checkpoint.restore(ckpts[k])
            status.expect_partial()
            q_theta = Distribution(nf, self.parameters)
            return q_theta
Beispiel #14
0
def test_initialization():
    D = 4
    nf = NormalizingFlow("autoregressive",
                         D,
                         2,
                         2,
                         15,
                         batch_norm=True,
                         post_affine=True)
    init_type = "iso_gauss"
    loc = -0.5
    scale = 2.0
    init_params = {"loc": loc, "scale": scale}
    nf.initialize(init_type, init_params, verbose=True)
    nf.plot_init_opt(init_type, init_params)

    z = nf.sample(int(1e4))
    z = z.numpy()
    mean_z = np.mean(z, 0)
    Sigma_z = np.cov(z.T)
    assert np.isclose(mean_z, loc * np.ones((D, )), atol=1e-1).all()
    assert np.isclose(Sigma_z, scale * np.eye(D), atol=1e-1).all()

    # For init load
    nf.initialize(init_type, init_params)

    # Bounds
    lb = np.zeros((D, ))
    ub = np.ones((D, ))
    nf = NormalizingFlow("autoregressive",
                         D,
                         2,
                         2,
                         15,
                         batch_norm=True,
                         bounds=(lb, ub))
    nf.initialize(init_type, init_params)

    return None