Ejemplo n.º 1
0
    def test_multivariate_normal_diag(self):
        with self.test_session() as sess:
            N, D, w_true, X_train, y_train, X, w, b, y = self._setup()

            # INFERENCE. Initialize sigma's at identity to verify if we
            # learned an approximately zero determinant.
            qw = MultivariateNormalDiag(mu=tf.Variable(tf.random_normal([D])),
                                        diag_stdev=tf.Variable(tf.ones(D)))
            qb = MultivariateNormalDiag(mu=tf.Variable(tf.random_normal([1])),
                                        diag_stdev=tf.Variable(tf.ones(1)))

            inference = ed.Laplace({
                w: qw,
                b: qb
            },
                                   data={
                                       X: X_train,
                                       y: y_train
                                   })
            inference.run(n_iter=100)

            self._test(sess, qw, qb, w_true)
            self.assertAllClose(qw.sigma.eval(),
                                tf.diag(tf.diag_part(qw.sigma)).eval())
            self.assertAllClose(qb.sigma.eval(),
                                tf.diag(tf.diag_part(qb.sigma)).eval())
Ejemplo n.º 2
0
 def klqp(self, docs, S, T, wordVec):
     K = self.K
     D = self.D
     nu = self.nu
     self.latent_vars = latent_vars = {}
     training_data = {}
     qmu = Normal(loc=tf.Variable(tf.random_normal([K, nu])),
                  scale=tf.nn.softplus(tf.Variable(tf.zeros([K, nu]))))
     latent_vars[self.mu] = qmu
     qsigmasq = InverseGamma(tf.nn.softplus(tf.Variable(tf.zeros([K, nu]))),
                             tf.nn.softplus(tf.Variable(tf.zeros([K, nu]))))
     latent_vars[self.sigmasq] = qsigmasq
     for d in range(D):
         training_data[self.w[d]] = docs[d]
     self.qmu = qmu
     self.qsigma = qsigma = tf.sqrt(qsigmasq)
     self.qw = MultivariateNormalDiag(loc=qmu, scale_diag=qsigma)
     V = len(wordVec)
     logprobs = [None] * V
     for i in range(V):
         logprobs[i] = self.qw.log_prob(wordVec[i])
     self.qbeta = tf.convert_to_tensor(logprobs)
     self.inference = ed.KLqp(latent_vars, data=training_data)
     self.inference.initialize(n_iter=T, n_print=10, n_samples=S)
     self.__run_inference__(T)
Ejemplo n.º 3
0
def test_mvn_same_as_edward_mvn():
    loc = np.zeros(5)
    scale = np.ones(5)

    A = mvn.mvn(loc=loc, scale=scale)
    B = MultivariateNormalDiag(loc=loc, scale_diag=scale)

    M = np.random.rand(5, 5)
    tf.InteractiveSession()

    assert (tf.reduce_sum(A.log_prob(M)).eval() -
            tf.reduce_sum(B.log_prob(M)).eval() < 1e-6)
Ejemplo n.º 4
0
def test_mvn_same_as_edward_log_prob():
    loc = np.zeros(5)
    scale = np.ones(5)

    A = mvn.mvn(loc=loc, scale=scale)
    B = MultivariateNormalDiag(loc=loc, scale_diag=scale)
    samples = np.random.rand(5, 5)
    tf.InteractiveSession()

    print('Log probability of Multivariate Normal Scipy vs Edward')
    print_err(
        tf.reduce_sum(A.log_prob(samples)).eval(),
        tf.reduce_sum(B.log_prob(samples)).eval())
def create_target_dist():
    """Create and return target distribution."""
    if FLAGS.dist != 'normal':
        raise NotImplementedError

    pi = np.random.dirichlet([1.] * K)
    #pi = pi[np.newaxis, :].astype(np.float32)

    #mus = 2.*np.random.rand(K, D).astype(np.float32) - 1.
    #stds = np.random.rand(K, D).astype(np.float32)

    mus = np.random.randn(K, D).astype(np.float32)
    stds = softplus(np.random.randn(K, D).astype(np.float32))

    pcomps = [
        MultivariateNormalDiag(loc=tf.convert_to_tensor(mus[i],
                                                        dtype=tf.float32),
                               scale_diag=tf.convert_to_tensor(
                                   stds[i], dtype=tf.float32))
        for i in range(K)
    ]
    p = Mixture(
        cat=Categorical(probs=tf.convert_to_tensor(pi, dtype=tf.float32)),
        components=pcomps)
    #q = VectorLaplaceDiag(loc=mus[0], scale_diag=stds[0])
    return p, mus, stds
Ejemplo n.º 6
0
 def __init__(self, K, D, N, nu, use_param=False):
     self.K = K  # number of topics
     self.D = D  # number of documents
     self.N = N  # number of words of each document
     self.nu = nu
     self.alpha = alpha = tf.zeros([K]) + 0.1
     self.sigmasq = InverseGamma(tf.ones(nu), tf.ones(nu), sample_shape=K)
     self.sigma = sigma = tf.sqrt(self.sigmasq)
     self.mu = mu = Normal(tf.zeros(nu), tf.ones(nu), sample_shape=K)
     self.theta = theta = [None] * D
     self.z = z = [None] * D
     self.w = w = [None] * D
     for d in range(D):
         theta[d] = Dirichlet(alpha)
         if use_param:
             w[d] = ParamMixture(mixing_weights=theta[d],
                                 component_params={
                                     'loc': mu,
                                     'scale_diag': sigma
                                 },
                                 component_dist=MultivariateNormalDiag,
                                 sample_shape=N[d])
             z[d] = w[d].cat
         else:
             z[d] = Categorical(probs=theta[d], sample_shape=N[d])
             components = [
                 MultivariateNormalDiag(loc=tf.gather(mu, k),
                                        scale_diag=tf.gather(self.sigma, k),
                                        sample_shape=N[d]) for k in range(K)
             ]
             w[d] = Mixture(cat=z[d],
                            components=components,
                            sample_shape=N[d])
Ejemplo n.º 7
0
def get_tf_mixture(locs, diags, weights):
    q_comps = [
        MultivariateNormalDiag(loc=loc, scale_diag=scale_diag)
        for loc, scale_diag in zip(locs, diags)
    ]
    cat = Categorical(probs=tf.convert_to_tensor(weights))
    return Mixture(cat=cat, components=q_comps)
Ejemplo n.º 8
0
def main():
    # build model
    xcomps = [
        Normal(loc=tf.convert_to_tensor(mixture_model_relbo.mus[i]),
               scale=tf.convert_to_tensor(mixture_model_relbo.stds[i]))
        for i in range(len(mixture_model_relbo.mus))
    ]
    x = Mixture(
        cat=Categorical(probs=tf.convert_to_tensor(mixture_model_relbo.pi)),
        components=xcomps,
        sample_shape=mixture_model_relbo.N)

    x_mvns = [
        MultivariateNormalDiag(
            loc=tf.convert_to_tensor(mixture_model_relbo.mus[i]),
            scale_diag=tf.convert_to_tensor(mixture_model_relbo.stds[i]))
        for i in range(len(mixture_model_relbo.mus))
    ]

    x_train, components = mixture_model_relbo.build_toy_dataset(
        mixture_model_relbo.N)
    n_examples, n_features = x_train.shape
    qxs = [
        MultivariateNormalDiag(loc=[scipy.stats.norm.rvs(1)],
                               scale_diag=[scipy.stats.norm.rvs(1)])
        for i in range(10)
    ]

    truth = [
        MultivariateNormalDiag(loc=mixture_model_relbo.mus[i],
                               scale_diag=mixture_model_relbo.stds[i])
        for i in range(len(mixture_model_relbo.mus))
    ]
    qxs.extend(truth)

    mix = Mixture(cat=Categorical(probs=[1. / len(qxs)] * len(qxs)),
                  components=qxs)

    sess = tf.InteractiveSession()
    with sess.as_default():
        mixture_model_relbo.fully_corrective(mix, x)
Ejemplo n.º 9
0
def construct_multivariatenormaldiag(dims, iter, name='', sample_shape=N):
    #loc = tf.get_variable(name + "_loc%d" % iter, dims)
    loc = tf.get_variable(name + "_loc%d" % iter,
                          initializer=tf.random_normal(dims))
    #scale = tf.nn.softplus(tf.get_variable(name + "_scale%d" % iter, dims))
    scale = tf.nn.softplus(
        tf.get_variable(name + "_scale%d" % iter,
                        initializer=tf.random_normal(dims)))
    mvn = MultivariateNormalDiag(loc=loc,
                                 scale_diag=scale,
                                 sample_shape=sample_shape)
    return mvn
Ejemplo n.º 10
0
def deserialize_target_from_file(filename):
    qt_deserialized = np.load(filename)
    mus = qt_deserialized['mus'].astype(np.float32)
    stds = qt_deserialized['stds'].astype(np.float32)
    pi = qt_deserialized['pi'].astype(np.float32)

    cat = Categorical(probs=tf.convert_to_tensor(pi[0]))
    target_comps = [
        MultivariateNormalDiag(loc=tf.convert_to_tensor(mus[i]),
                               scale_diag=tf.convert_to_tensor(stds[i]))
        for i in range(len(mus))
    ]
    return Mixture(cat=cat, components=target_comps)
Ejemplo n.º 11
0
def deserialize_mixture_from_file(filename):
    qt_deserialized = np.load(filename)
    locs = qt_deserialized['locs'].astype(np.float32)
    scale_diags = qt_deserialized['scale_diags'].astype(np.float32)
    weights = qt_deserialized['weights'].astype(np.float32)

    q_comps = [
        MultivariateNormalDiag(loc=loc, scale_diag=scale_diag)
        for loc, scale_diag in zip(locs, scale_diags)
    ]
    cat = Categorical(probs=tf.convert_to_tensor(weights))
    q_latest = Mixture(cat=cat, components=q_comps)
    return q_latest
Ejemplo n.º 12
0
 def target_dist(*args, **kwargs):
     """Build the target distribution"""
     stds = kwargs['stds']
     mus = kwargs['mus']
     pi = kwargs['pi']
     pcomps = [
         MultivariateNormalDiag(
             loc=tf.convert_to_tensor(mus[i], dtype=tf.float32),
             scale_diag=tf.convert_to_tensor(
                 stds[i], dtype=tf.float32))
         for i in range(len(mus))
     ]
     p = Mixture(
         cat=Categorical(probs=tf.convert_to_tensor(pi[0])),
         components=pcomps)
     #q = VectorLaplaceDiag(loc=mus[0], scale_diag=stds[0])
     return p
def test_lipschitz_init(pi, mus, stds):
    g = tf.Graph()
    with g.as_default():
        tf.set_random_seed(FLAGS.seed)
        sess = tf.InteractiveSession()
        with sess.as_default():
            s = construct_normal([1], 0, 's')
            sess.run(tf.global_variables_initializer())
            logger.info('mean of s = %.3f, std = %.3f' %
                        (s.mean().eval(), s.stddev().eval()))
            # build target distribution
            pcomps = [
                MultivariateNormalDiag(
                    loc=tf.convert_to_tensor(mus[i], dtype=tf.float32),
                    scale_diag=tf.convert_to_tensor(stds[i], dtype=tf.float32))
                for i in range(len(mus))
            ]
            p = Mixture(cat=Categorical(probs=tf.convert_to_tensor(pi)),
                        components=pcomps)
            lipschitz_init_estimate = opt.adafw_linit(s, p)
            logger.info('L estimate is %.5f' % lipschitz_init_estimate)
Ejemplo n.º 14
0
  def test_multivariate_normal_diag(self):
    with self.test_session() as sess:
      N, D, w_true, X_train, y_train, X, w, b, y = self._setup()

      # INFERENCE. Initialize scales at identity to verify if we
      # learned an approximately zero determinant.
      qw = MultivariateNormalDiag(
          loc=tf.Variable(tf.random_normal([D])),
          scale_diag=tf.Variable(tf.ones(D)))
      qb = MultivariateNormalDiag(
          loc=tf.Variable(tf.random_normal([1])),
          scale_diag=tf.Variable(tf.ones(1)))

      inference = ed.Laplace({w: qw, b: qb}, data={X: X_train, y: y_train})
      inference.run(n_iter=100)

      self._test(sess, qw, qb, w_true)
      self.assertAllClose(qw.covariance().eval(),
                          tf.diag(tf.diag_part(qw.covariance())).eval())
      self.assertAllClose(qb.covariance().eval(),
                          tf.diag(tf.diag_part(qb.covariance())).eval())
def adaptive_fw(weights, locs, diags, q_t, mu_s, cov_s, s_t, p, k, l_prev,
                return_gamma=False):
    """Adaptive Frank-Wolfe algorithm.
    
    Sets step size as suggested in Algorithm 1 of
    https://arxiv.org/pdf/1806.05123.pdf

    Args:
        weights: [k], weights of the mixture components of q_t
        locs: [k x dim], means of mixture components of q_t
        diags: [k x dim], std deviations of mixture components of q_t
        q_t: current mixture iterate q_t
        mu_s: [dim], mean for LMO solution s
        cov_s: [dim], cov matrix for LMO solution s
        s_t: Current atom & LMO Solution s
        p: edward.model, target distribution p
        k: iteration number of Frank-Wolfe
        l_prev: previous lipschitz estimate
        return_gamma: only return the value of gamma
    Returns:
        If return_gamma is True, only the computed value of gamma
        is returned. Else returns a dictionary containing gamma, 
        lipschitz estimate, duality gap and step information
    """

    # Set $q_{t+1}$'s params
    new_locs = copy.copy(locs)
    new_diags = copy.copy(diags)
    new_locs.append(mu_s)
    new_diags.append(cov_s)

    d_t_norm = divergence(s_t, q_t, metric=FLAGS.distance_metric).eval()
    logger.info('distance norm is %.5f' % d_t_norm)

    N_samples = FLAGS.n_monte_carlo_samples
    # create and sample from $s_t, q_t$
    sample_q = q_t.sample([N_samples])
    sample_s = s_t.sample([N_samples])
    step_s = tf.reduce_mean(grad_kl(q_t, p, sample_s)).eval()
    step_q = tf.reduce_mean(grad_kl(q_t, p, sample_q)).eval()
    gap = step_q - step_s
    logger.info('duality gap %.5f' % gap)
    if gap < 0: logger.warning("Duality gap is negative returning 0 step")

    #gamma = 2. / (k + 2.)
    gamma = 0.
    tau = FLAGS.exp_adafw
    eta = FLAGS.damping_adafw
    # did the adaptive loop suceed or not
    step_type = "fixed"
    # NOTE: this is from v1 of the paper, new version
    # replaces multiplicative tau with divisor eta
    pow_tau = 1.0
    i, l_t = 0, l_prev
    f_t =  kl_divergence(q_t, p, allow_nan_stats=False).eval()
    debug('f(q_t) = %.5f' % (f_t))
    # return intial estimate if gap is -ve
    while gap >= 0:
        # compute $L_t$ and $\gamma_t$
        l_t = pow_tau * eta * l_prev
        gamma = min(gap / (l_t * d_t_norm), 1.0)
        d_1 = - gamma * gap
        d_2 = gamma * gamma * l_t * d_t_norm / 2.
        debug('linear d1 = %.5f, quad d2 = %.5f' % (d_1, d_2))
        quad_bound_rhs = f_t  + d_1 + d_2

        # $w_{t + 1} = [(1 - \gamma)w_t, \gamma]$
        new_weights = copy.copy(weights)
        new_weights = [(1. - gamma) * w for w in new_weights]
        new_weights.append(gamma)
        qt_new = Mixture(
            cat=Categorical(probs=tf.convert_to_tensor(new_weights)),
            components=[
                MultivariateNormalDiag(loc=loc, scale_diag=diag)
                for loc, diag in zip(new_locs, new_diags)
            ])
        quad_bound_lhs = kl_divergence(qt_new, p, allow_nan_stats=False).eval()
        logger.info('lt = %.5f, gamma = %.3f, f_(qt_new) = %.5f, '
                    'linear extrapolated = %.5f' % (l_t, gamma, quad_bound_lhs,
                                                    quad_bound_rhs))
        if quad_bound_lhs <= quad_bound_rhs:
            step_type = "adaptive"
            break
        pow_tau *= tau
        i += 1
        #if i > FLAGS.adafw_MAXITER or gamma < MIN_GAMMA:
        if i > FLAGS.adafw_MAXITER:
            # estimate not good
            #gamma = 2. / (k + 2.)
            gamma = 0.
            l_t = l_prev
            step_type = "fixed_adaptive_MAXITER"
            break

    if return_gamma: return gamma
    return {
        'gamma': gamma,
        'l_estimate': l_t,
        'gap': gap,
        'step_type': step_type
    }
Ejemplo n.º 16
0
def main(argv):
    del argv

    x_train, components = build_toy_dataset(N)
    n_examples, n_features = x_train.shape

    # save the target
    outdir = setup_outdir()
    np.savez(os.path.join(outdir, 'target_dist.npz'),
             pi=pi,
             mus=mus,
             stds=stds)

    weights, comps = [], []
    elbos = []
    relbo_vals = []
    times = []
    for iter in range(FLAGS.n_fw_iter):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(FLAGS.seed)
            sess = tf.InteractiveSession()
            with sess.as_default():
                # build model
                xcomps = [
                    Normal(loc=tf.convert_to_tensor(mus[i]),
                           scale=tf.convert_to_tensor(stds[i]))
                    for i in range(len(mus))
                ]
                x = Mixture(cat=Categorical(probs=tf.convert_to_tensor(pi)),
                            components=xcomps,
                            sample_shape=N)

                qx = construct_normal([n_features], iter, 'qx')
                if iter > 0:
                    qtx = Mixture(
                        cat=Categorical(probs=tf.convert_to_tensor(weights)),
                        components=[
                            Normal(
                                loc=c['loc'][0],
                                #scale_diag=tf.nn.softplus(c['scale_diag'])) for c in comps], sample_shape=N)
                                scale=c['scale_diag'][0]) for c in comps
                        ],
                        sample_shape=N)
                    fw_iterates = {x: qtx}
                else:
                    fw_iterates = {}

                sess.run(tf.global_variables_initializer())

                total_time = 0
                start_inference_time = time.time()
                inference = relbo.KLqp({x: qx},
                                       fw_iterates=fw_iterates,
                                       fw_iter=iter)
                inference.run(n_iter=FLAGS.LMO_iter)
                end_inference_time = time.time()

                total_time += end_inference_time - start_inference_time

                if iter > 0:
                    relbo_vals.append(-utils.compute_relbo(
                        qx, fw_iterates[x], x, np.log(iter + 1)))

                if iter == 0:
                    gamma = 1.
                elif iter > 0 and FLAGS.fw_variant == 'fixed':
                    gamma = 2. / (iter + 2.)
                elif iter > 0 and FLAGS.fw_variant == 'line_search':
                    start_line_search_time = time.time()
                    gamma = line_search_dkl(weights, [c['loc'] for c in comps],
                                            [c['scale_diag'] for c in comps],
                                            qx.loc.eval(),
                                            qx.stddev().eval(), x, iter)
                    end_line_search_time = time.time()
                    total_time += end_line_search_time - start_line_search_time
                elif iter > 0 and FLAGS.fw_variant == 'fc':
                    gamma = 2. / (iter + 2.)

                comps.append({
                    'loc': qx.mean().eval(),
                    'scale_diag': qx.stddev().eval()
                })
                weights = utils.update_weights(weights, gamma, iter)

                print("weights", weights)
                print("comps", [c['loc'] for c in comps])
                print("scale_diags", [c['scale_diag'] for c in comps])

                q_latest = Mixture(
                    cat=Categorical(probs=tf.convert_to_tensor(weights)),
                    components=[MultivariateNormalDiag(**c) for c in comps],
                    sample_shape=N)

                if FLAGS.fw_variant == "fc":
                    start_fc_time = time.time()
                    weights = fully_corrective(q_latest, x)
                    weights = list(weights)
                    for i in reversed(range(len(weights))):
                        w = weights[i]
                        if w == 0:
                            del weights[i]
                            del comps[i]
                    weights = np.array(weights)
                    end_fc_time = time.time()
                    total_time += end_fc_time - start_fc_time

                q_latest = Mixture(
                    cat=Categorical(probs=tf.convert_to_tensor(weights)),
                    components=[MultivariateNormalDiag(**c) for c in comps],
                    sample_shape=N)

                elbos.append(elbo(q_latest, x))

                outdir = setup_outdir()

                print("total time", total_time)
                times.append(float(total_time))
                utils.save_times(os.path.join(outdir, 'times.csv'), times)

                elbos_filename = os.path.join(outdir, 'elbos.csv')
                logger.info("iter, %d, elbo, %.2f +/- %.2f" %
                            (iter, *elbos[-1]))
                np.savetxt(elbos_filename, elbos, delimiter=',')
                logger.info("saving elbos to, %s" % elbos_filename)

                relbos_filename = os.path.join(outdir, 'relbos.csv')
                np.savetxt(relbos_filename, relbo_vals, delimiter=',')
                logger.info("saving relbo values to, %s" % relbos_filename)

                for_serialization = {
                    'locs': np.array([c['loc'] for c in comps]),
                    'scale_diags': np.array([c['scale_diag'] for c in comps])
                }
                qt_outfile = os.path.join(outdir, 'qt_iter%d.npz' % iter)
                np.savez(qt_outfile, weights=weights, **for_serialization)
                np.savez(os.path.join(outdir, 'qt_latest.npz'),
                         weights=weights,
                         **for_serialization)
                logger.info("saving qt to, %s" % qt_outfile)
        tf.reset_default_graph()
Ejemplo n.º 17
0
def run_gap(pi, mus, stds):
    weights, comps = [], []
    elbos = []
    relbo_vals = []
    for t in range(FLAGS.n_fw_iter):
        logger.info('Frank Wolfe Iteration %d' % t)
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(FLAGS.seed)
            sess = tf.InteractiveSession()
            with sess.as_default():
                # target distribution components
                pcomps = [
                    MultivariateNormalDiag(
                        loc=tf.convert_to_tensor(mus[i], dtype=tf.float32),
                        scale_diag=tf.convert_to_tensor(stds[i],
                                                        dtype=tf.float32))
                    for i in range(len(mus))
                ]
                # target distribution
                p = Mixture(cat=Categorical(probs=tf.convert_to_tensor(pi)),
                            components=pcomps)

                # LMO appoximation
                s = construct_normal([1], t, 's')
                fw_iterates = {}
                if t > 0:
                    qtx = Mixture(
                        cat=Categorical(probs=tf.convert_to_tensor(weights)),
                        components=[
                            MultivariateNormalDiag(**c) for c in comps
                        ])
                    fw_iterates = {p: qtx}
                sess.run(tf.global_variables_initializer())
                # Run inference on relbo to solve LMO problem
                # NOTE: KLqp has a side effect, it is modifying s
                inference = relbo.KLqp({p: s},
                                       fw_iterates=fw_iterates,
                                       fw_iter=t)
                inference.run(n_iter=FLAGS.LMO_iter)
                # s now contains solution to LMO

                if t > 0:
                    sample_s = s.sample([FLAGS.n_monte_carlo_samples])
                    sample_q = qtx.sample([FLAGS.n_monte_carlo_samples])
                    step_s = tf.reduce_mean(grad_kl(qtx, p, sample_s)).eval()
                    step_q = tf.reduce_mean(grad_kl(qtx, p, sample_q)).eval()
                    gap = step_q - step_s
                    logger.info('Frank-Wolfe gap at iter %d is %.5f' %
                                (t, gap))
                    if gap < 0:
                        eprint('Frank-Wolfe gab becoming negative!')
                    # f(q*) = f(p) = 0
                    logger.info('Objective value (actual gap) is %.5f' %
                                kl_divergence(qtx, p).eval())

                gamma = 2. / (t + 2.)
                comps.append({
                    'loc': s.mean().eval(),
                    'scale_diag': s.stddev().eval()
                })
                weights = coreutils.update_weights(weights, gamma, t)

        tf.reset_default_graph()
Ejemplo n.º 18
0
    Halos_Pos.append(hal[3:3 + nb_components * 2].reshape(nb_components, 2))
print("Galaxy (X, Y):", len(Galaxy_Pos), Galaxy_Pos[0].shape)
print("Galaxy (E1, E2):", len(Galaxy_E), Galaxy_E[0].shape)
print("Halos (X, Y):", len(Halos_Pos), Halos_Pos[0].shape)

# ===========================================================================
# Create the model
# ===========================================================================
# latent variable z
mu = Normal(mu=tf.zeros([nb_components, nb_features]),
            sigma=tf.ones([nb_components, nb_features]))
sigma = InverseGamma(alpha=tf.ones([nb_components, nb_features]),
                     beta=tf.ones([nb_components, nb_features]))
cat = Categorical(logits=tf.zeros([nb_datapoints, nb_components]))
components = [
    MultivariateNormalDiag(mu=tf.ones([nb_datapoints, 1]) * mu[k],
                           diag_stdev=tf.ones([nb_datapoints, 1]) * sigma[k])
    for k in range(nb_components)
]
x = Mixture(cat=cat, components=components)

# ====== inference ====== #
qmu = Normal(mu=tf.Variable(tf.random_normal([nb_components, nb_features])),
             sigma=tf.nn.softplus(
                 tf.Variable(tf.zeros([nb_components, nb_features]))))
qsigma = InverseGamma(alpha=tf.nn.softplus(
    tf.Variable(tf.random_normal([nb_components, nb_features]))),
                      beta=tf.nn.softplus(
                          tf.Variable(
                              tf.random_normal([nb_components, nb_features]))))

# fitting data
Ejemplo n.º 19
0
class SimpleGaussianLDA(object):
    def __init__(self, K, D, N, nu, use_param=False):
        self.K = K  # number of topics
        self.D = D  # number of documents
        self.N = N  # number of words of each document
        self.nu = nu
        self.alpha = alpha = tf.zeros([K]) + 0.1
        self.sigmasq = InverseGamma(tf.ones(nu), tf.ones(nu), sample_shape=K)
        self.sigma = sigma = tf.sqrt(self.sigmasq)
        self.mu = mu = Normal(tf.zeros(nu), tf.ones(nu), sample_shape=K)
        self.theta = theta = [None] * D
        self.z = z = [None] * D
        self.w = w = [None] * D
        for d in range(D):
            theta[d] = Dirichlet(alpha)
            if use_param:
                w[d] = ParamMixture(mixing_weights=theta[d],
                                    component_params={
                                        'loc': mu,
                                        'scale_diag': sigma
                                    },
                                    component_dist=MultivariateNormalDiag,
                                    sample_shape=N[d])
                z[d] = w[d].cat
            else:
                z[d] = Categorical(probs=theta[d], sample_shape=N[d])
                components = [
                    MultivariateNormalDiag(loc=tf.gather(mu, k),
                                           scale_diag=tf.gather(self.sigma, k),
                                           sample_shape=N[d]) for k in range(K)
                ]
                w[d] = Mixture(cat=z[d],
                               components=components,
                               sample_shape=N[d])

    def __run_inference__(self, T, S=None):
        tf.global_variables_initializer().run()
        for n in range(self.inference.n_iter):
            info_dict = self.inference.update()
            self.inference.print_progress(info_dict)
        self.inference.finalize()

    def klqp(self, docs, S, T, wordVec):
        K = self.K
        D = self.D
        nu = self.nu
        self.latent_vars = latent_vars = {}
        training_data = {}
        qmu = Normal(loc=tf.Variable(tf.random_normal([K, nu])),
                     scale=tf.nn.softplus(tf.Variable(tf.zeros([K, nu]))))
        latent_vars[self.mu] = qmu
        qsigmasq = InverseGamma(tf.nn.softplus(tf.Variable(tf.zeros([K, nu]))),
                                tf.nn.softplus(tf.Variable(tf.zeros([K, nu]))))
        latent_vars[self.sigmasq] = qsigmasq
        for d in range(D):
            training_data[self.w[d]] = docs[d]
        self.qmu = qmu
        self.qsigma = qsigma = tf.sqrt(qsigmasq)
        self.qw = MultivariateNormalDiag(loc=qmu, scale_diag=qsigma)
        V = len(wordVec)
        logprobs = [None] * V
        for i in range(V):
            logprobs[i] = self.qw.log_prob(wordVec[i])
        self.qbeta = tf.convert_to_tensor(logprobs)
        self.inference = ed.KLqp(latent_vars, data=training_data)
        self.inference.initialize(n_iter=T, n_print=10, n_samples=S)
        self.__run_inference__(T)

    def getTopWords(self, wordVec, tokens):
        K = self.K
        V = len(wordVec)
        qbeta = self.qbeta
        qbeta_sample = qbeta.eval()
        prob = [None] * K
        for k in range(K):
            prob[k] = qbeta_sample[:, k]
        self.tokens_probs = tokens_probs = [None] * K
        self.top_words = [None] * K
        for k in range(K):
            tokens_probs[k] = dict((t, p) for t, p in zip(range(V), prob[k]))
            newdict = sorted(tokens_probs[k],
                             key=tokens_probs[k].get,
                             reverse=True)[:15]
            self.top_words[k] = newdict
            print('topic %d' % k)
            for Id in newdict:
                print(tokens[Id], tokens_probs[k][Id])

    def getPMI(self, comatrix):
        K = self.K
        self.pmis = pmis = [None] * K
        for k in range(K):
            pmis[k] = util.pmi(comatrix, self.top_words[k])
            print('topic %d pmi: %f' % (k, pmis[k]))
def _test(mu, diag_stdev, n):
    x = MultivariateNormalDiag(mu=mu, diag_stdev=diag_stdev)
    val_est = get_dims(x.sample(n))
    val_true = n + get_dims(mu)
    assert val_est == val_true
 def f(gamma):
     weights = [(1 - gamma), gamma]
     q_l = Mixture(cat=Categorical(probs=tf.convert_to_tensor(weights)),
                   components=[MultivariateNormalDiag(**c) for c in comps])
     return kl_divergence(q_l, qt).eval()
Ejemplo n.º 22
0
  def _build_model(self):
    """
    implementation of the KMN
    """
    with tf.variable_scope(self.name):
      self.layer_in_x, self.layer_in_y = self._build_input_layers() # add playeholders, data_normalization and data_noise if desired

      self.X_in = L.get_output(self.layer_in_x)
      self.Y_in = L.get_output(self.layer_in_y)

      # get batch size
      self.batch_size = tf.shape(self.X_ph)[0]

      # create core multi-layer perceptron
      core_network = MLP(
        name="core_network",
        input_layer=self.layer_in_x,
        output_dim=self.n_centers*self.n_scales,
        hidden_sizes=self.hidden_sizes,
        hidden_nonlinearity=self.hidden_nonlinearity,
        output_nonlinearity=None,
      )

      self.core_output_layer = core_network.output_layer

      # weights of the mixture components
      self.logits = L.get_output(self.core_output_layer)
      self.softmax_layer_weights = L.NonlinearityLayer(self.core_output_layer, nonlinearity=tf.nn.softmax)
      self.weights = L.get_output(self.softmax_layer_weights)

      # locations of the kernelfunctions
      self.locs = tf.Variable(np.zeros((self.n_centers, self.ndim_y)), name="locs", trainable=False, dtype=tf.float32) # assign sampled locs when fitting
      self.locs_layer = L.VariableLayer(core_network.input_layer, (self.n_centers, self.ndim_y), variable=self.locs, name="locs", trainable=False)

      self.locs_array = tf.unstack(tf.transpose(tf.multiply(tf.ones((self.batch_size, self.n_centers, self.ndim_y)), self.locs), perm=[1, 0, 2]))
      assert len(self.locs_array) == self.n_centers

      # scales of the gaussian kernels
      log_scales_layer = L.VariableLayer(core_network.input_layer, (self.n_scales,),
                                         variable=tf.Variable(self.init_scales_softplus, dtype=tf.float32, trainable=self.train_scales),
                                         name="log_scales", trainable=self.train_scales)

      self.scales_layer = L.NonlinearityLayer(log_scales_layer, nonlinearity=tf.nn.softplus)
      self.scales = L.get_output(self.scales_layer)
      self.scales_array = scales_array = tf.unstack(tf.transpose(tf.multiply(tf.ones((self.batch_size, self.ndim_y, self.n_scales)), self.scales), perm=[2,0,1]))
      assert len(self.scales_array) == self.n_scales

      # put mixture components together
      self.y_input = L.get_output(self.layer_in_y)
      self.cat = cat = Categorical(logits=self.logits)
      self.components = components = [MultivariateNormalDiag(loc=loc, scale_diag=scale) for loc in self.locs_array for scale in scales_array]
      self.mixture = mixture = Mixture(cat=cat, components=components)

      # softmax entropy penalty -> regularization
      self.softmax_entropy = tf.reduce_sum(- tf.multiply(tf.log(self.weights), self.weights), axis=1)
      self.entropy_reg_coef_ph = tf.placeholder_with_default(float(self.entropy_reg_coef), name='entropy_reg_coef', shape=())
      self.softmax_entrop_loss = self.entropy_reg_coef_ph * self.softmax_entropy
      tf.losses.add_loss(self.softmax_entrop_loss, tf.GraphKeys.REGULARIZATION_LOSSES)

      # tensor to compute probabilities
      if self.data_normalization:
        self.pdf_ = mixture.prob(self.y_input) / tf.reduce_prod(self.std_y_sym)
        self.log_pdf_ = mixture.log_prob(self.y_input) - tf.reduce_sum(tf.log(self.std_y_sym))
      else:
        self.pdf_ = mixture.prob(self.y_input)
        self.log_pdf_ = mixture.log_prob(self.y_input)

      # symbolic tensors for getting the unnormalized mixture components
      if self.data_normalization:
        self.scales_unnormalized = tf.transpose(tf.multiply(tf.ones((self.ndim_y, self.n_scales)), self.scales)) * self.std_y_sym # shape = (n_scales, ndim_y)
        self.locs_unnormalized = self.locs * self.std_y_sym + self.mean_y_sym
      else:
        self.scales_unnormalized = tf.transpose(tf.multiply(tf.ones((self.ndim_y, self.n_scales)), self.scales)) # shape = (n_scales, ndim_y)
        self.locs_unnormalized = self.locs

    # initialize LayersPowered --> provides functions for serializing tf models
    LayersPowered.__init__(self, [self.core_output_layer, self.locs_layer, self.scales_layer, self.layer_in_y])
def main(argv):
    del argv

    outdir = FLAGS.outdir
    if '~' in outdir: outdir = os.path.expanduser(outdir)
    os.makedirs(outdir, exist_ok=True)

    # Files to log metrics
    times_filename = os.path.join(outdir, 'times.csv')
    elbos_filename = os.path.join(outdir, 'elbos.csv')
    objective_filename = os.path.join(outdir, 'kl.csv')
    reference_filename = os.path.join(outdir, 'ref_kl.csv')
    step_filename = os.path.join(outdir, 'steps.csv')
    # 'adafw', 'ada_afw', 'ada_pfw'
    if FLAGS.fw_variant.startswith('ada'):
        curvature_filename = os.path.join(outdir, 'curvature.csv')
        gap_filename = os.path.join(outdir, 'gap.csv')
        iter_info_filename = os.path.join(outdir, 'iter_info.txt')
    elif FLAGS.fw_variant == 'line_search':
        goutdir = os.path.join(outdir, 'gradients')

    # empty the files present in the folder already
    open(times_filename, 'w').close()
    open(elbos_filename, 'w').close()
    open(objective_filename, 'w').close()
    open(reference_filename, 'w').close()
    open(step_filename, 'w').close()
    # 'adafw', 'ada_afw', 'ada_pfw'
    if FLAGS.fw_variant.startswith('ada'):
        open(curvature_filename, 'w').close()
        append_to_file(curvature_filename, "c_local,c_global")
        open(gap_filename, 'w').close()
        open(iter_info_filename, 'w').close()
    elif FLAGS.fw_variant == 'line_search':
        os.makedirs(goutdir, exist_ok=True)

    for i in range(FLAGS.n_fw_iter):
        # NOTE: First iteration (t = 0) is initialization
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(FLAGS.seed)
            sess = tf.InteractiveSession()
            with sess.as_default():
                p, mus, stds = create_target_dist()

                # current iterate (solution until now)
                if FLAGS.init == 'random':
                    muq = np.random.randn(D).astype(np.float32)
                    stdq = softplus(np.random.randn(D).astype(np.float32))
                    raise ValueError
                else:
                    muq = mus[0]
                    stdq = stds[0]

                # 1 correct LMO
                t = 1
                comps = [{'loc': muq, 'scale_diag': stdq}]
                weights = [1.0]
                curvature_estimate = opt.adafw_linit()

                qtx = MultivariateNormalDiag(
                    loc=tf.convert_to_tensor(muq, dtype=tf.float32),
                    scale_diag=tf.convert_to_tensor(stdq, dtype=tf.float32))
                fw_iterates = {p: qtx}

                # calculate kl-div with 1 component
                objective_old = kl_divergence(qtx, p).eval()
                logger.info("kl with init %.4f" % (objective_old))
                append_to_file(reference_filename, objective_old)

                # s is the solution to LMO. It is initialized randomly
                # mu ~ N(0, 1), std ~ softplus(N(0, 1))
                s = coreutils.construct_multivariatenormaldiag([D], t, 's')

                sess.run(tf.global_variables_initializer())

                total_time = 0
                start_inference_time = time.time()
                if FLAGS.LMO == 'vi':
                    # we have to iterate over parameter space
                    raise ValueError
                    inference = relbo.KLqp({p: s},
                                           fw_iterates=fw_iterates,
                                           fw_iter=t)
                    inference.run(n_iter=FLAGS.LMO_iter)
                # s now contains solution to LMO
                end_inference_time = time.time()

                mu_s = s.mean().eval()
                cov_s = s.stddev().eval()

                # NOTE: keep only step size time
                #total_time += end_inference_time - start_inference_time

                # compute step size to update the next iterate
                step_result = {}
                if FLAGS.fw_variant == 'fixed':
                    gamma = 2. / (t + 2.)
                elif FLAGS.fw_variant == 'line_search':
                    start_line_search_time = time.time()
                    step_result = opt.line_search_dkl(
                        weights, [c['loc'] for c in comps],
                        [c['scale_diag']
                         for c in comps], qtx, mu_s, cov_s, s, p, t)
                    end_line_search_time = time.time()
                    total_time += (end_line_search_time -
                                   start_line_search_time)
                    gamma = step_result['gamma']
                elif FLAGS.fw_variant == 'adafw':
                    start_adafw_time = time.time()
                    step_result = opt.adaptive_fw(
                        weights, [c['loc'] for c in comps],
                        [c['scale_diag'] for c in comps], qtx, mu_s, cov_s, s,
                        p, t, curvature_estimate)
                    end_adafw_time = time.time()
                    total_time += end_adafw_time - start_adafw_time
                    gamma = step_result['gamma']
                else:
                    raise NotImplementedError

                comps.append({'loc': mu_s, 'scale_diag': cov_s})
                weights = [(1. - gamma), gamma]

                c_global = estimate_global_curvature(comps, qtx)

                q_latest = Mixture(
                    cat=Categorical(probs=tf.convert_to_tensor(weights)),
                    components=[MultivariateNormalDiag(**c) for c in comps])

                # Log metrics for current iteration
                time_t = float(total_time)
                logger.info('total time %f' % (time_t))
                append_to_file(times_filename, time_t)

                elbo_t = elbo(q_latest, p, n_samples=1000)
                logger.info("iter, %d, elbo, %.2f +/- %.2f" %
                            (t, elbo_t[0], elbo_t[1]))
                append_to_file(elbos_filename,
                               "%f,%f" % (elbo_t[0], elbo_t[1]))

                logger.info('iter %d, gamma %.4f' % (t, gamma))
                append_to_file(step_filename, gamma)

                objective_t = kl_divergence(q_latest, p).eval()
                logger.info("run %d, kl %.4f" % (i, objective_t))
                append_to_file(objective_filename, objective_t)

                if FLAGS.fw_variant.startswith('ada'):
                    curvature_estimate = step_result['c_estimate']
                    append_to_file(gap_filename, step_result['gap'])
                    append_to_file(iter_info_filename,
                                   step_result['step_type'])
                    logger.info('gap = %.3f, ct = %.5f, iter_type = %s' %
                                (step_result['gap'], step_result['c_estimate'],
                                 step_result['step_type']))
                    append_to_file(curvature_filename,
                                   '%f,%f' % (curvature_estimate, c_global))
                elif FLAGS.fw_variant == 'line_search':
                    n_line_search_samples = step_result['n_samples']
                    grad_t = step_result['grad_gamma']
                    g_outfile = os.path.join(
                        goutdir, 'line_search_samples_%d.npy.%d' %
                        (n_line_search_samples, t))
                    logger.info('saving line search data to, %s' % g_outfile)
                    np.save(open(g_outfile, 'wb'), grad_t)

            sess.close()

        tf.reset_default_graph()
Ejemplo n.º 24
0
 def test_multivariate_real(self):
     with self.test_session():
         x = MultivariateNormalDiag(tf.zeros(2), tf.ones(2))
         y = ed.transform(x)
         sample = y.sample(10, seed=1).eval()
         self.assertSamplePosNeg(sample)
def adaptive_afw(weights, comps, locs, diags, q_t, mu_s, cov_s, s_t, p,
                 k, l_prev):
    """
        Away steps variant
    Args:
        same as fixed
    """
    d_t_norm = divergence(s_t, q_t, metric=FLAGS.distance_metric).eval()
    logger.info('distance norm is %.5f' % d_t_norm)

    # Find v_t
    qcomps = q_t.components
    index_v_t, step_v_t = argmax_grad_dotp(p, q_t, qcomps,
                                           FLAGS.n_monte_carlo_samples)
    v_t = qcomps[index_v_t]

    # Frank-Wolfe gap
    sample_q = q_t.sample([FLAGS.n_monte_carlo_samples])
    sample_s = s_t.sample([FLAGS.n_monte_carlo_samples])
    step_s = tf.reduce_mean(grad_kl(q_t, p, sample_s)).eval()
    step_q = tf.reduce_mean(grad_kl(q_t, p, sample_q)).eval()
    gap_fw = step_q - step_s
    if gap_fw < 0: logger.warning("Frank-Wolfe duality gap is negative")
    # Away gap
    gap_a = step_v_t - step_q
    if gap_a < 0: eprint('Away gap < 0!!!')
    logger.info('fw gap %.5f, away gap %.5f' % (gap_fw, gap_a))

    # Set $q_{t+1}$'s params
    new_locs = copy.copy(locs)
    new_diags = copy.copy(diags)
    if (gap_fw >= gap_a) or (len(comps) == 1):
        # FW direction, proceeds exactly as adafw
        logger.info('Proceeding in FW direction ')
        adaptive_step_type = 'fw'
        gap = gap_fw
        new_locs.append(mu_s)
        new_diags.append(cov_s)
        gamma_max = 1.0
    else:
        # Away direction
        logger.info('Proceeding in Away direction ')
        adaptive_step_type = 'away'
        gap = gap_a
        if weights[index_v_t] < 1.0:
            gamma_max = weights[index_v_t] / (1.0 - weights[index_v_t])
        else:
            gamma_max = 100. # Large value when t = 1

    def default_fixed_step(fail_type='fixed'):
        # adaptive failed, return to fixed
        gamma = 2. / (k + 2.)
        new_comps = copy.copy(comps)
        new_comps.append({'loc': mu_s, 'scale_diag': cov_s})
        new_weights = [(1. - gamma) * w for w in weights]
        new_weights.append(gamma)
        return {
            'gamma': 2. / (k + 2.),
            'l_estimate': l_prev,
            'weights': new_weights,
            'comps': new_comps,
            'gap': gap,
            'step_type': fail_type
        }
    
    if gap <= 0:
        return default_fixed_step()

    tau = FLAGS.exp_adafw
    eta = FLAGS.damping_adafw
    pow_tau = 1.0
    i, l_t = 0, l_prev
    f_t =  kl_divergence(q_t, p, allow_nan_stats=False).eval()
    debug('f(q_t) = %.5f' % (f_t))
    gamma = 2. / (k + 2)
    is_drop_step = False
    while gamma >= MIN_GAMMA and i < FLAGS.adafw_MAXITER:
        # compute $L_t$ and $\gamma_t$
        l_t = pow_tau * eta * l_prev
        # NOTE: Handle extreme values of gamma carefully
        gamma = min(gap / (l_t * d_t_norm), gamma_max)

        d_1 = - gamma * gap
        d_2 = gamma * gamma * l_t * d_t_norm / 2.
        debug('linear d1 = %.5f, quad d2 = %.5f' % (d_1, d_2))
        quad_bound_rhs = f_t  + d_1 + d_2

        # construct $q_{t + 1}$
        if adaptive_step_type == 'fw':
            if gamma == gamma_max:
                # gamma = 1.0, q_{t + 1} = s_t
                new_comps = [{'loc': mu_s, 'scale_diag': cov_s}]
                new_weights = [1.]
                qt_new = MultivariateNormalDiag(loc=mu_s, scale_diag=cov_s)
            else:
                new_comps = copy.copy(comps)
                new_comps.append({'loc': mu_s, 'scale_diag': cov_s})
                new_weights = copy.copy(weights)
                new_weights = [(1. - gamma) * w for w in new_weights]
                new_weights.append(gamma)
                qt_new = Mixture(
                    cat=Categorical(probs=tf.convert_to_tensor(new_weights)),
                    components=[
                        MultivariateNormalDiag(loc=loc, scale_diag=diag)
                        for loc, diag in zip(new_locs, new_diags)
                    ])
        elif adaptive_step_type == 'away':
            new_weights = copy.copy(weights)
            new_comps = copy.copy(comps)
            if gamma == gamma_max:
                # drop v_t
                is_drop_step = True
                logger.info('...drop step')
                del new_weights[index_v_t]
                new_weights = [(1. + gamma) * w for w in new_weights]
                del new_comps[index_v_t]
                # NOTE: recompute locs and diags after dropping v_t
                drop_locs = [c['loc'] for c in new_comps]
                drop_diags = [c['scale_diag'] for c in new_comps]
                qt_new = Mixture(
                    cat=Categorical(probs=tf.convert_to_tensor(new_weights)),
                    components=[
                        MultivariateNormalDiag(loc=loc, scale_diag=diag)
                        for loc, diag in zip(drop_locs, drop_diags)
                    ])
            else:
                is_drop_step = False
                new_weights = [(1. + gamma) * w for w in new_weights]
                new_weights[index_v_t] -= gamma
                qt_new = Mixture(
                    cat=Categorical(probs=tf.convert_to_tensor(new_weights)),
                    components=[
                        MultivariateNormalDiag(loc=loc, scale_diag=diag)
                        for loc, diag in zip(new_locs, new_diags)
                    ])

        quad_bound_lhs = kl_divergence(qt_new, p, allow_nan_stats=False).eval()
        logger.info('lt = %.5f, gamma = %.3f, f_(qt_new) = %.5f, '
                    'linear extrapolated = %.5f' % (l_t, gamma, quad_bound_lhs,
                                                    quad_bound_rhs))
        if quad_bound_lhs <= quad_bound_rhs:
            step_type = "adaptive"
            if adaptive_step_type == "away": step_type = "away"
            if is_drop_step: step_type = "drop"
            return {
                'gamma': gamma,
                'l_estimate': l_t,
                'weights': new_weights,
                'comps': new_comps,
                'gap': gap,
                'step_type': step_type
            }
        pow_tau *= tau
        i += 1

    # adaptive loop failed, return fixed step size
    logger.warning("gamma below threshold value, returning fixed step")
    return default_fixed_step()
Ejemplo n.º 26
0
def test_exact_gamma():
    pi = mixture_model_relbo.pi
    mus = mixture_model_relbo.mus
    stds = mixture_model_relbo.stds
    outfile = os.path.join(FLAGS.outdir, 'gamma.csv')
    g = tf.Graph()
    with g.as_default():
        tf.set_random_seed(FLAGS.seed)
        sess = tf.InteractiveSession()
        with sess.as_default():
            # Build p = pi[0] * N(mu[0], std[0]) + pi[1] * N(mu[1], std[1])
            # thus, gamma = pi[1] (=0.6), q_t = N(mu[0], std[0])
            # s = N(mu[1], std[1])
            pcomps = [
                MultivariateNormalDiag(
                    loc=tf.convert_to_tensor(mus[i], dtype=tf.float32),
                    scale_diag=tf.convert_to_tensor(stds[i], dtype=tf.float32))
                for i in range(len(mus))
            ]
            p = Mixture(cat=Categorical(probs=tf.convert_to_tensor(pi[0])),
                        components=pcomps)
            # build q_t
            weights = [1.]
            locs = [mus[0]]
            diags = [stds[0]]
            # Create current iter $q_t$
            qt = Mixture(cat=Categorical(probs=tf.convert_to_tensor(weights)),
                         components=[
                             MultivariateNormalDiag(loc=loc, scale_diag=diag)
                             for loc, diag in zip(locs, diags)
                         ])
            s = MultivariateNormalDiag(loc=mus[1], scale_diag=stds[1])
            if FLAGS.fw_variant == "line_search":
                gamma = opt.line_search_dkl(weights,
                                            locs,
                                            diags,
                                            qt,
                                            mus[1],
                                            stds[1],
                                            s,
                                            p,
                                            FLAGS.init_k,
                                            return_gamma=True)
                # seed, n_line_search_iter, n_monte_carlo_samples, b, gamma
                append_to_file(
                    outfile,
                    "%d,%d,%d,%d,%f" % (FLAGS.seed, FLAGS.n_line_search_iter,
                                        FLAGS.n_monte_carlo_samples, 1, gamma))
            elif FLAGS.fw_variant == "adafw":
                gamma = opt.adaptive_fw(weights=weights,
                                        locs=locs,
                                        diags=diags,
                                        q_t=qt,
                                        mu_s=mus[1],
                                        cov_s=stds[1],
                                        s_t=s,
                                        p=p,
                                        k=FLAGS.init_k,
                                        l_prev=1.,
                                        return_gamma=True)
                # seed, n_monte_carlo_samples, eta, tau, linit, gamma
                append_to_file(
                    outfile, "%d,%d,%f,%f,%f,%f" %
                    (FLAGS.seed, FLAGS.n_monte_carlo_samples,
                     FLAGS.damping_adafw, FLAGS.exp_adafw, FLAGS.linit_fixed,
                     gamma))
            else:
                raise NotImplementedError('other variants not tested yet.')
    print_err(pi[0][1], gamma)
D = 2  # dimensionality of data
ed.set_seed(42)

# DATA
x_train = build_toy_dataset(N)
plt.scatter(x_train[:, 0], x_train[:, 1])
plt.axis([-3, 3, -3, 3])
plt.title("Simulated dataset")
plt.show()

# MODEL
mu = Normal(mu=tf.zeros([K, D]), sigma=tf.ones([K, D]))
sigma = InverseGamma(alpha=tf.ones([K, D]), beta=tf.ones([K, D]))
cat = Categorical(logits=tf.zeros([N, K]))
components = [
    MultivariateNormalDiag(mu=tf.ones([N, 1]) * mu[k],
                           diag_stdev=tf.ones([N, 1]) * sigma[k])
    for k in range(K)
]
x = Mixture(cat=cat, components=components)

# INFERENCE
qmu = Normal(mu=tf.Variable(tf.random_normal([K, D])),
             sigma=tf.nn.softplus(tf.Variable(tf.zeros([K, D]))))
qsigma = InverseGamma(alpha=tf.nn.softplus(
    tf.Variable(tf.random_normal([K, D]))),
                      beta=tf.nn.softplus(tf.Variable(tf.random_normal([K,
                                                                        D]))))

inference = ed.KLqp({mu: qmu, sigma: qsigma}, data={x: x_train})
inference.initialize(n_samples=20, n_iter=4000)
Ejemplo n.º 28
0
def test_adaptive_gamma():
    pi = np.array([0.2, 0.5, 0.3]).astype(np.float32)
    mus = [[2.], [-1.], [0.]]
    stds = [[.6], [.4], [0.5]]
    outfile = os.path.join(FLAGS.outdir, 'gamma.csv')
    g = tf.Graph()
    with g.as_default():
        sess = tf.InteractiveSession()
        with sess.as_default():
            # p = pi[0] * N(mus[0], stds[0]) + ... + pi[2] * N(mus[2], stds[2])
            p = Mixture(
                cat=Categorical(probs=tf.convert_to_tensor(pi)),
                components=[
                    #Normal(loc=tf.convert_to_tensor(mus[i], dtype=tf.float32),
                    #    scale=tf.convert_to_tensor(
                    #        stds[i], dtype=tf.float32)),
                    MultivariateNormalDiag(
                        loc=tf.convert_to_tensor(mus[i], dtype=tf.float32),
                        scale_diag=tf.convert_to_tensor(stds[i],
                                                        dtype=tf.float32))
                    for i in range(len(mus))
                ])
            qt = Mixture(cat=Categorical(probs=tf.convert_to_tensor(pi[:2])),
                         components=[
                             MultivariateNormalDiag(
                                 loc=tf.convert_to_tensor(mus[i],
                                                          dtype=tf.float32),
                                 scale_diag=tf.convert_to_tensor(
                                     stds[i], dtype=tf.float32))
                             for i in range(len(mus[:2]))
                         ])
            st = MultivariateNormalDiag(
                loc=tf.convert_to_tensor(mus[2], dtype=tf.float32),
                scale_diag=tf.convert_to_tensor(stds[2], dtype=tf.float32))

            if FLAGS.fw_variant == "line_search":
                gamma = opt.line_search_dkl(pi[:2],
                                            mus[:2],
                                            stds[:2],
                                            qt,
                                            mus[2],
                                            stds[2],
                                            st,
                                            p,
                                            FLAGS.init_k,
                                            return_gamma=True)
                # seed, n_line_search_iter, n_monte_carlo_samples, b, gamma
                append_to_file(
                    outfile,
                    "%d,%d,%d,%d,%f" % (FLAGS.seed, FLAGS.n_line_search_iter,
                                        FLAGS.n_monte_carlo_samples, 1, gamma))
            elif FLAGS.fw_variant == "adafw":
                gamma = opt.adaptive_fw(weights=pi[:2],
                                        locs=mus[:2],
                                        diags=stds[:2],
                                        q_t=qt,
                                        mu_s=mus[2],
                                        cov_s=stds[2],
                                        s_t=st,
                                        p=p,
                                        k=FLAGS.init_k,
                                        l_prev=opt.adafw_linit(qt, p),
                                        return_gamma=True)
                # seed, n_monte_carlo_samples, eta, tau, linit, gamma
                append_to_file(
                    outfile, "%d,%d,%f,%f,%f,%f" %
                    (FLAGS.seed, FLAGS.n_monte_carlo_samples,
                     FLAGS.damping_adafw, FLAGS.exp_adafw, FLAGS.linit_fixed,
                     gamma))
    print_err(pi[2], gamma)
def line_search_dkl(weights, locs, diags, q_t, mu_s, cov_s, s_t, p, k,
                    return_gamma=False):
    """Performs line search for the best step size gamma.
    
    Uses gradient ascent to find gamma that minimizes
    KL(q_t + gamma (s - q_t) || p)
    
    Args:
        weights: [k], weights of mixture components of q_t
        locs: [k x dim], means of mixture components of q_t
        diags: [k x dim], deviations of mixture components of q_t
        q_t: current mixture iterate q_t
        mu_s: [dim], mean for LMO Solution s
        cov_s: [dim], cov matrix for LMO solution s
        s_t: Current atom & LMO Solution s
        p: edward.model, target distribution p
        k: iteration number of Frank-Wolfe
        return_gamma: only return the value of gamma
    Returns:
        If return_gamma is True, only the computed value of
        gamma is returned. Else along with gradient data
        is returned in a dict
    """
    N_samples = FLAGS.n_monte_carlo_samples
    # sample from $q_t$ and s
    sample_q = q_t.sample([N_samples])
    sample_s = s_t.sample([N_samples])
    # set $q_{t+1}$'s parameters
    new_locs = copy.copy(locs)
    new_diags = copy.copy(diags)
    new_locs.append(mu_s)
    new_diags.append(cov_s)
    # initialize $\gamma$
    gamma = 2. / (k + 2.)
    n_steps = FLAGS.n_line_search_iter
    prog_bar = ed.util.Progbar(n_steps)
    # storing gradients for analysis
    grad_gamma = []
    for it in range(n_steps):
        print("line_search iter %d, %.5f" % (it, gamma))
        new_weights = copy.copy(weights)
        new_weights = [(1. - gamma) * w for w in new_weights]
        new_weights.append(gamma)
        qt_new = Mixture(
            cat=Categorical(probs=tf.convert_to_tensor(new_weights)),
            components=[
                MultivariateNormalDiag(loc=loc, scale_diag=diag)
                for loc, diag in zip(new_locs, new_diags)
            ])
        rez_s = grad_kl(qt_new, p, sample_s).eval()
        rez_q = grad_kl(qt_new, p, sample_q).eval()
        grad_gamma.append({'E_s': rez_s, 'E_q': rez_q, 'gamma': gamma})
        # Gradient descent step size decreasing as $\frac{1}{it + 1}$
        gamma_prime = gamma - 0.1 * (np.mean(rez_s) - np.mean(rez_q)) / (it + 1.)
        # Projecting it back to [0, 1]
        if gamma_prime >= 1 or gamma_prime <= 0:
            gamma_prime = max(min(gamma_prime, 1.), 0.)

        if np.abs(gamma - gamma_prime) < 1e-6:
            gamma = gamma_prime
            break

        gamma = gamma_prime

    if return_gamma: return gamma
    return {'gamma': gamma, 'n_samples': N_samples, 'grad_gamma': grad_gamma}
Ejemplo n.º 30
0
D = 2  # dimensionality of data
ed.set_seed(42)

# DATA
x_train = build_toy_dataset(N)
plt.scatter(x_train[:, 0], x_train[:, 1])
plt.axis([-3, 3, -3, 3])
plt.title("Simulated dataset")
plt.show()

# MODEL
mu = Normal(mu=tf.zeros([K, D]), sigma=tf.ones([K, D]))
sigma = InverseGamma(alpha=tf.ones([K, D]), beta=tf.ones([K, D]))
cat = Categorical(logits=tf.zeros([N, K]))
components = [
    MultivariateNormalDiag(mu=tf.ones([N, 1]) * tf.gather(mu, k),
                           diag_stdev=tf.ones([N, 1]) * tf.gather(sigma, k))
    for k in range(K)
]
x = Mixture(cat=cat, components=components)

# INFERENCE
qmu = Normal(mu=tf.Variable(tf.random_normal([K, D])),
             sigma=tf.nn.softplus(tf.Variable(tf.zeros([K, D]))))
qsigma = InverseGamma(alpha=tf.nn.softplus(
    tf.Variable(tf.random_normal([K, D]))),
                      beta=tf.nn.softplus(tf.Variable(tf.random_normal([K,
                                                                        D]))))

inference = ed.KLqp({mu: qmu, sigma: qsigma}, data={x: x_train})
inference.initialize(n_samples=20, n_iter=4000)
def adaptive_pfw(weights, comps, locs, diags, q_t, mu_s, cov_s, s_t, p,
                 k, l_prev):
    """
        Adaptive pairwise variant.
    Args:
        same as fixed
    """
    d_t_norm = divergence(s_t, q_t, metric=FLAGS.distance_metric).eval()
    logger.info('distance norm is %.5f' % d_t_norm)

    # Find v_t
    qcomps = q_t.components
    index_v_t, step_v_t = argmax_grad_dotp(p, q_t, qcomps,
                                           FLAGS.n_monte_carlo_samples)
    v_t = qcomps[index_v_t]

    # Pairwise gap
    sample_s = s_t.sample([FLAGS.n_monte_carlo_samples])
    step_s = tf.reduce_mean(grad_kl(q_t, p, sample_s)).eval()
    gap_pw = step_v_t - step_s
    if gap_pw < 0: eprint("Pairwise gap is negative")

    def default_fixed_step(fail_type='fixed'):
        # adaptive failed, return to fixed
        gamma = 2. / (k + 2.)
        new_comps = copy.copy(comps)
        new_comps.append({'loc': mu_s, 'scale_diag': cov_s})
        new_weights = [(1. - gamma) * w for w in weights]
        new_weights.append(gamma)
        return {
            'gamma': 2. / (k + 2.),
            'l_estimate': l_prev,
            'weights': new_weights,
            'comps': new_comps,
            'gap': gap_pw,
            'step_type': fail_type
        }

    logger.info('Pairwise gap %.5f' % gap_pw)

    # Set $q_{t+1}$'s params
    new_locs = copy.copy(locs)
    new_diags = copy.copy(diags)
    new_locs.append(mu_s)
    new_diags.append(cov_s)
    gap = gap_pw
    if gap <= 0:
        return default_fixed_step()
    gamma_max = weights[index_v_t]
    step_type = 'adaptive'

    tau = FLAGS.exp_adafw
    eta = FLAGS.damping_adafw
    pow_tau = 1.0
    i, l_t = 0, l_prev
    f_t =  kl_divergence(q_t, p, allow_nan_stats=False).eval()
    drop_step = False
    debug('f(q_t) = %.5f' % (f_t))
    gamma = 2. / (k + 2)
    while gamma >= MIN_GAMMA and i < FLAGS.adafw_MAXITER:
        # compute $L_t$ and $\gamma_t$
        l_t = pow_tau * eta * l_prev
        gamma = min(gap / (l_t * d_t_norm), gamma_max)

        d_1 = - gamma * gap
        d_2 = gamma * gamma * l_t * d_t_norm / 2.
        debug('linear d1 = %.5f, quad d2 = %.5f' % (d_1, d_2))
        quad_bound_rhs = f_t  + d_1 + d_2

        # construct $q_{t + 1}$
        new_weights = copy.copy(weights)
        new_weights.append(gamma)
        if gamma == gamma_max:
            # hardcoding to 0 for precision issues
            new_weights[index_v_t] = 0
            drop_step = True
        else:
            new_weights[index_v_t] -= gamma
            drop_step = False

        qt_new = Mixture(
            cat=Categorical(probs=tf.convert_to_tensor(new_weights)),
            components=[
                MultivariateNormalDiag(loc=loc, scale_diag=diag)
                for loc, diag in zip(new_locs, new_diags)
            ])

        quad_bound_lhs = kl_divergence(qt_new, p, allow_nan_stats=False).eval()
        logger.info('lt = %.5f, gamma = %.3f, f_(qt_new) = %.5f, '
                    'linear extrapolated = %.5f' % (l_t, gamma, quad_bound_lhs,
                                                    quad_bound_rhs))
        if quad_bound_lhs <= quad_bound_rhs:
            new_comps = copy.copy(comps)
            new_comps.append({'loc': mu_s, 'scale_diag': cov_s})
            if drop_step:
                del new_comps[index_v_t]
                del new_weights[index_v_t]
                logger.info("...drop step")
                step_type = 'drop'
            return {
                'gamma': gamma,
                'l_estimate': l_t,
                'weights': new_weights,
                'comps': new_comps,
                'gap': gap,
                'step_type': step_type
            }
        pow_tau *= tau
        i += 1
    
    # gamma below MIN_GAMMA
    logger.warning("gamma below threshold value, returning fixed step")
    return default_fixed_step("fixed_adaptive_MAXITER")