Beispiel #1
0
def test_unbiased_aug_grad():
    # Test using linear 2D system eps
    N = 100
    z = np.random.normal(0.0, 1.0, (N, 4)).astype(DTYPE)
    log_q_z = np.random.normal(2.0, 3.0, (N, )).astype(DTYPE)
    mu = np.array([0.0, 0.1, 2 * np.pi, 0.1 * np.pi]).astype(DTYPE)

    lb = np.NINF
    ub = np.PINF
    a11 = Parameter("a11", 1, lb, ub)
    a12 = Parameter("a12", 1, lb, ub)
    a21 = Parameter("a21", 1, lb, ub)
    a22 = Parameter("a22", 1, lb, ub)
    params = [a11, a12, a21, a22]
    M = Model("lds", params)
    M.set_eps(linear2D_freq)

    nf = NormalizingFlow(arch_type="autoregressive",
                         D=4,
                         num_stages=1,
                         num_layers=2,
                         num_units=15)

    with tf.GradientTape(persistent=True) as tape:
        z, log_q_z = nf(N)
        params = nf.trainable_variables
        nparams = len(params)
        tape.watch(params)
        _, _, R1s, R2 = aug_lag_vars(z, log_q_z, M.eps, mu, N)
        aug_grad = unbiased_aug_grad(R1s, R2, params, tape)

        T_x_grads = [[[None for i in range(N // 2)] for i in range(4)]
                     for i in range(nparams)]
        T_x = M.eps(z)
        for i in range(N // 2):
            T_x_i_grads = []
            for j in range(4):
                _grads = tape.gradient(T_x[i, j] - mu[j], params)
                for k in range(nparams):
                    T_x_grads[k][j][i] = _grads[k]
    del tape

    # Average across the first half of samples
    for k in range(nparams):
        T_x_grads[k] = np.mean(np.array(T_x_grads[k]), axis=1)

    R2_np = np.mean(T_x[N // 2:, :], 0) - mu
    aug_grad_np = []
    for k in range(nparams):
        aug_grad_np.append(np.tensordot(T_x_grads[k], R2_np, axes=(0, 0)))

    for i in range(nparams):
        assert np.isclose(aug_grad_np[i], aug_grad[i], rtol=1e-3).all()

    return None
Beispiel #2
0
def test_aug_lag_vars():
    # Test using linear 2D system eps
    N = 100
    z = np.random.normal(0.0, 1.0, (N, 4)).astype(DTYPE)
    log_q_z = np.random.normal(2.0, 3.0, (N, )).astype(DTYPE)
    mu = np.array([0.0, 0.1, 2 * np.pi, 0.1 * np.pi]).astype(DTYPE)

    lb = np.NINF
    ub = np.PINF
    a11 = Parameter("a11", 1, lb, ub)
    a12 = Parameter("a12", 1, lb, ub)
    a21 = Parameter("a21", 1, lb, ub)
    a22 = Parameter("a22", 1, lb, ub)
    params = [a11, a12, a21, a22]
    M = Model("lds", params)
    M.set_eps(linear2D_freq)

    H, R, R1s, R2 = aug_lag_vars(z, log_q_z, M.eps, mu, N)

    alphas = np.zeros((N, ))
    omegas = np.zeros((N, ))
    for i in range(N):
        alphas[i], omegas[i] = linear2D_freq_np(z[i, 0], z[i, 1], z[i, 2],
                                                z[i, 3])

    # mean_alphas = np.mean(alphas)
    # mean_omegas = np.mean(omegas)
    mean_alphas = 0.0
    mean_omegas = 2.0 * np.pi

    T_x_np = np.stack(
        (
            alphas,
            np.square(alphas - mean_alphas),
            omegas,
            np.square(omegas - mean_omegas),
        ),
        axis=1,
    )

    H_np = np.mean(-log_q_z)
    R_np = np.mean(T_x_np, 0) - mu
    R1_np = np.mean(T_x_np[:N // 2, :], 0) - mu
    R2_np = np.mean(T_x_np[N // 2:, :], 0) - mu
    R1s_np = list(R1_np)

    rtol = 1e-3
    assert np.isclose(H, H_np, rtol=rtol)
    assert np.isclose(R, R_np, rtol=rtol).all()
    assert np.isclose(R1s, R1s_np, rtol=rtol).all()
    assert np.isclose(R2, R2_np, rtol=rtol).all()

    return None
Beispiel #3
0
 def train_step(eta, c):
     with tf.GradientTape(persistent=True) as tape:
         z, log_q_z = nf(N)
         params = nf.trainable_variables
         tape.watch(params)
         H, R, R1s, R2 = aug_lag_vars(z, log_q_z, self.eps, mu, N)
         neg_H = -H
         lagrange_dot = tf.reduce_sum(tf.multiply(eta, R))
     aug_l2 = c / 2.0 * tf.reduce_sum(tf.square(R))
     cost = neg_H + lagrange_dot + aug_l2
     H_grad = tape.gradient(neg_H, params)
     lagrange_grad = tape.gradient(lagrange_dot, params)
     aug_grad = unbiased_aug_grad(R1s, R2, params, tape)
     gradients = [
         g1 + g2 + c * g3
         for g1, g2, g3 in zip(H_grad, lagrange_grad, aug_grad)
     ]
     optimizer.apply_gradients(zip(gradients, params))
     return cost, H, R, z, log_q_z
Beispiel #4
0
    def epi(
        self,
        mu,
        arch_type="coupling",
        num_stages=3,
        num_layers=2,
        num_units=None,
        batch_norm=True,
        bn_momentum=0.99,
        post_affine=False,
        random_seed=1,
        init_type=None,  #"iso_gauss",
        init_params=None,  #{"loc": 0.0, "scale": 1.0},
        K=10,
        num_iters=1000,
        N=500,
        lr=1e-3,
        c0=1.0,
        gamma=0.25,
        beta=4.0,
        alpha=0.05,
        nu=1.0,
        stop_early=False,
        log_rate=50,
        verbose=False,
        save_movie_data=False,
    ):
        """Runs emergent property inference for this model with mean parameter :math:`\\mu`.


        :param mu: Mean parameter of the emergent property.
        :type mu: np.ndarray
        :param arch_type: :math:`\\in` :obj:`['autoregressive', 'coupling']`, defaults to :obj:`'coupling'`.
        :type arch_type: str, optional
        :param num_stages: Number of coupling or autoregressive stages, defaults to 3.
        :type num_stages: int, optional
        :param num_layers: Number of neural network layer per conditional, defaults to 2.
        :type num_layers: int, optional
        :param num_units: Number of units per layer, defaults to max(2D, 15).
        :type num_units: int, optional
        :param batch_norm: Use batch normalization between stages, defaults to True.
        :type batch_norm: bool, optional
        :param bn_momentum: Batch normalization momentum parameter, defaults to 0.99.
        :type bn_momentrum: float, optional
        :param post_affine: Shift and scale following main transform, defaults to False.
        :type post_affine: bool, optional
        :param random_seed: Random seed of architecture parameters, defaults to 1.
        :type random_seed: int, optional
        :param init_type: :math:`\\in` :obj:`['iso_gauss', 'gaussian']`.
        :type init_type: str, optional
        :param init_params: Parameters according to :obj:`init_type`.
        :type init_params: dict, optional
        :param K: Number of augmented Lagrangian iterations, defaults to 10.
        :type K: int, float, optional
        :param num_iters: Number of optimization iterations, defaults to 1000.
        :type num_iters: int, optional
        :param N: Number of batch samples per iteration, defaults to 500.
        :type N: int, optional
        :param lr: Adam optimizer learning rate, defaults to 1e-3.
        :type lr: float, optional
        :param c0: Initial augmented Lagrangian coefficient, defaults to 1.0.
        :type c0: float, optional
        :param gamma: Augmented lagrangian hyperparameter, defaults to 0.25.
        :type gamma: float, optional
        :param beta: Augmented lagrangian hyperparameter, defaults to 4.0.
        :type beta: float, optional
        :param alpha: P-value threshold for convergence testing, defaults to 0.05.
        :type alpha: float, optional
        :param nu: Fraction of N for convergence testing, defaults to 0.1.
        :type nu: float, optional
        :param stop_early: Exit if converged, defaults to False.
        :type stop_early: bool, optional
        :param log_rate: Record optimization data every so iterations, defaults to 100.
        :type log_rate: int, optional
        :param verbose: Print optimization information, defaults to False.
        :type verbose: bool, optional
        :param save_movie_data: Save data for making optimization movie, defaults to False.
        :type save_movie_data: bool, optional
        :returns: q_theta, opt_df, save_path
        :rtype: epi.models.Distribution, pandas.DataFrame, str
        """
        if num_units is None:
            num_units = max(2 * self.D, 15)

        nf = NormalizingFlow(
            arch_type=arch_type,
            D=self.D,
            num_stages=num_stages,
            num_layers=num_layers,
            num_units=num_units,
            batch_norm=batch_norm,
            bn_momentum=bn_momentum,
            post_affine=post_affine,
            bounds=self._get_bounds(),
            random_seed=random_seed,
        )

        # Hyperparameter object
        aug_lag_hps = AugLagHPs(N, lr, c0, gamma, beta)

        # Initialize architecture to gaussian.
        print("Initializing %s architecture." % nf.to_string(), flush=True)
        if init_type is None or init_params is None:
            mu_init = np.zeros((self.D))
            Sigma = np.zeros((self.D, self.D))
            for i in range(self.D):
                if (np.isneginf(nf.lb[i]) and np.isposinf(nf.ub[i])):
                    mu_init[i] = 0.
                    Sigma[i, i] = 1.
                elif (np.isneginf(nf.lb[i])):
                    mu_init[i] = self.ub[i] - 2.
                    Sigma[i, i] = 1.
                elif (np.isposinf(nf.ub[i])):
                    mu_init[i] = self.lb[i] + 2.
                    Sigma[i, i] = 1.
                else:
                    mu_init[i] = (nf.lb[i] + nf.ub[i]) / 2.
                    Sigma[i, i] = (nf.ub[i] - nf.lb[i]) / 2.
            init_type = "gaussian"
            init_params = {'mu': mu_init, 'Sigma': Sigma}
        nf.initialize(init_type, init_params)

        # Checkpoint the initialization.
        optimizer = tf.keras.optimizers.Adam(lr)
        ckpt = tf.train.Checkpoint(optimizer=optimizer, model=nf)
        ckpt_dir = self.get_save_path(mu, nf, aug_lag_hps)
        manager = tf.train.CheckpointManager(ckpt,
                                             directory=ckpt_dir,
                                             max_to_keep=None)
        manager.save(checkpoint_number=0)
        print("Saving EPI models to %s." % ckpt_dir, flush=True)

        @tf.function
        def train_step(eta, c):
            with tf.GradientTape(persistent=True) as tape:
                z, log_q_z = nf(N)
                params = nf.trainable_variables
                tape.watch(params)
                H, R, R1s, R2 = aug_lag_vars(z, log_q_z, self.eps, mu, N)
                neg_H = -H
                lagrange_dot = tf.reduce_sum(tf.multiply(eta, R))
            aug_l2 = c / 2.0 * tf.reduce_sum(tf.square(R))
            cost = neg_H + lagrange_dot + aug_l2
            H_grad = tape.gradient(neg_H, params)
            lagrange_grad = tape.gradient(lagrange_dot, params)
            aug_grad = unbiased_aug_grad(R1s, R2, params, tape)
            gradients = [
                g1 + g2 + c * g3
                for g1, g2, g3 in zip(H_grad, lagrange_grad, aug_grad)
            ]
            optimizer.apply_gradients(zip(gradients, params))
            return cost, H, R, z, log_q_z

        @tf.function
        def two_dim_T_x_batch(nf, eps, M, N, m):
            z, _ = nf(M * N)
            T_x = eps(z)
            T_x = tf.reshape(T_x, (M, N, m))
            return T_x

        @tf.function
        def get_R_norm_dist(nf, eps, mu, M, N):
            m = mu.shape[1]
            T_x = two_dim_T_x_batch(nf, eps, M, N, m)
            return tf.reduce_sum(tf.square(tf.reduce_mean(T_x, axis=1) - mu),
                                 axis=1)

        @tf.function
        def get_R_mean_dist(nf, eps, mu, M, N):
            m = mu.shape[1]
            T_x = two_dim_T_x_batch(nf, eps, M, N, m)
            return tf.reduce_mean(T_x, axis=1) - mu

        M_test = 200
        N_test = int(nu * N)
        M_norm = 200
        # Initialize augmented Lagrangian parameters eta and c.
        eta, c = np.zeros((self.m, ), np.float32), c0
        etas, cs = np.zeros((K, self.m)), np.zeros((K, ))

        # Initialize optimization data frame.
        z, log_q_z = nf(N)
        H_0, R_0, _, _ = aug_lag_vars(z, log_q_z, self.eps, mu, N)
        cost_0 = -H_0 + np.dot(eta, R_0) + np.sum(np.square(R_0))
        R_keys = ["R%d" % (i + 1) for i in range(self.m)]
        opt_it_dfs = [self._opt_it_df(0, 0, H_0.numpy(), R_0.numpy(), R_keys)]

        # Record samples for movie.
        if save_movie_data:
            N_save = 200
            zs = [z.numpy()[:N_save, :]]
            log_q_zs = [log_q_z.numpy()[:N_save]]

        # Measure initial R norm distribution.
        mu_colvec = np_column_vec(mu).astype(np.float32).T
        norms = get_R_norm_dist(nf, self.eps, mu_colvec, M_norm, N)

        # EPI optimization
        print(format_opt_msg(0, 0, cost_0, H_0, R_0), flush=True)
        failed = False
        for k in range(1, K + 1):
            etas[k - 1], cs[k - 1], eta, c
            for i in range(1, num_iters + 1):
                time1 = time.time()
                cost, H, R, z, log_q_z = train_step(eta, c)
                time2 = time.time()
                if i % log_rate == 0:
                    if verbose:
                        print(format_opt_msg(k, i, cost, H, R), flush=True)
                    iter = (k - 1) * num_iters + i
                    opt_it_dfs.append(
                        self._opt_it_df(k, iter, H.numpy(), R.numpy(), R_keys))
                    if save_movie_data:
                        zs.append(z.numpy()[:N_save, :])
                        log_q_zs.append(log_q_z.numpy()[:N_save])
                if np.isnan(cost):
                    failed = True
                    break
            if not verbose:
                print(format_opt_msg(k, i, cost, H, R), flush=True)

            # Save epi optimization data following aug lag iteration k.
            opt_it_df = pd.concat(opt_it_dfs, ignore_index=True)
            manager.save(checkpoint_number=k)

            if failed:
                converged = False
            else:
                R_means = get_R_mean_dist(nf, self.eps, mu_colvec, M_test,
                                          N_test)
                converged = self.test_convergence(R_means.numpy(), alpha)
            last_ind = opt_it_df["iteration"] == k * num_iters

            opt_it_df.loc[last_ind, "converged"] = converged
            self._save_epi_opt(ckpt_dir, opt_it_df, cs, etas)
            opt_it_dfs = [opt_it_df]

            if k < K:
                if np.isnan(cost):
                    break
                # Check for convergence if early stopping.
                if stop_early and converged:
                    break

                # Update eta and c
                eta = eta + c * R
                norms_k = get_R_norm_dist(nf, self.eps, mu_colvec, M_norm, N)
                t, p = ttest_ind(norms_k.numpy(),
                                 gamma * norms.numpy(),
                                 equal_var=False)
                u = np.random.rand(1)
                if u < 1 - p / 2.0 and t > 0.0:
                    c = beta * c
                norms = norms_k

        time_per_it = time2 - time1
        if save_movie_data:
            np.savez(
                ckpt_dir + "movie_data.npz",
                zs=np.array(zs),
                log_q_zs=np.array(log_q_zs),
                time_per_it=time_per_it,
                iterations=np.arange(0, k * num_iters + 1, log_rate),
            )
        else:
            np.savez(
                ckpt_dir + "timing.npz",
                time_per_it=time_per_it,
            )

        # Save hyperparameters.
        self._save_hps(ckpt_dir, nf, aug_lag_hps, init_type, init_params)

        # Return optimized distribution.
        q_theta = Distribution(nf, self.parameters)

        return q_theta, opt_it_dfs[0], ckpt_dir, failed