예제 #1
0
def beta_log_prob(self, val):
  conc0 = self.parameters['concentration0']
  conc1 = self.parameters['concentration1']
  result = (conc1 - 1.0) * tf.log(val)
  result += (conc0 - 1.0) * tf.log(1.0 - val)
  result += -tf.lgamma(conc1) - tf.lgamma(conc0) + tf.lgamma(conc1 + conc0)
  return result
예제 #2
0
    def logpmf(self, x, n, p):
        """Log of the probability mass function.

        Parameters
        ----------
        x : tf.Tensor
            A n-D tensor for n > 1, where the inner (right-most)
            dimension represents the multivariate dimension. Each
            element is the number of outcomes in a bucket and not a
            one-hot.
        n : tf.Tensor
            A tensor of one less dimension than ``x``,
            representing the number of outcomes, equal to sum x[i]
            along the inner (right-most) dimension.
        p : tf.Tensor
            A tensor of one less dimension than ``x``, representing
            probabilities which sum to 1.

        Returns
        -------
        tf.Tensor
            A tensor of one dimension less than the input.
        """
        x = tf.cast(x, dtype=tf.float32)
        n = tf.cast(n, dtype=tf.float32)
        p = tf.cast(p, dtype=tf.float32)
        multivariate_idx = len(get_dims(x)) - 1
        if multivariate_idx == 0:
            return tf.lgamma(n + 1.0) - \
                   tf.reduce_sum(tf.lgamma(x + 1.0)) + \
                   tf.reduce_sum(x * tf.log(p))
        else:
            return tf.lgamma(n + 1.0) - \
                   tf.reduce_sum(tf.lgamma(x + 1.0), multivariate_idx) + \
                   tf.reduce_sum(x * tf.log(p), multivariate_idx)
예제 #3
0
    def logpdf(self, x, df, loc=0, scale=1):
        """Log of the probability density function.

        Parameters
        ----------
        x : tf.Tensor
            A n-D tensor.
        df : tf.Tensor
            A tensor of same shape as ``x``, and with all elements
            constrained to :math:`df > 0`.
        loc : tf.Tensor
            A tensor of same shape as ``x``.
        scale : tf.Tensor
            A tensor of same shape as ``x``, and with all elements
            constrained to :math:`scale > 0`.

        Returns
        -------
        tf.Tensor
            A tensor of same shape as input.
        """
        x = tf.cast(x, dtype=tf.float32)
        df = tf.cast(df, dtype=tf.float32)
        loc = tf.cast(loc, dtype=tf.float32)
        scale = tf.cast(scale, dtype=tf.float32)
        z = (x - loc) / scale
        return tf.lgamma(0.5 * (df + 1.0)) - tf.lgamma(0.5 * df) - \
               0.5 * (tf.log(np.pi) + tf.log(df)) - tf.log(scale) - \
               0.5 * (df + 1.0) * tf.log(1.0 + (1.0/df) * tf.square(z))
예제 #4
0
파일: densities.py 프로젝트: blutooth/dgp
def beta(alpha, beta, y):
    # need to clip y, since log of 0 is nan...
    y = tf.clip_by_value(y, 1e-6, 1-1e-6)
    return (alpha - 1.) * tf.log(y) + (beta - 1.) * tf.log(1. - y) \
        + tf.lgamma(alpha + beta)\
        - tf.lgamma(alpha)\
        - tf.lgamma(beta)
예제 #5
0
def beta(x, alpha, beta):
    # need to clip x, since log of 0 is nan...
    x = tf.clip_by_value(x, 1e-6, 1-1e-6)
    return (alpha - 1.) * tf.log(x) + (beta - 1.) * tf.log(1. - x) \
        + tf.lgamma(alpha + beta)\
        - tf.lgamma(alpha)\
        - tf.lgamma(beta)
예제 #6
0
def multinomial_log_prob(self, val):
  n = self.parameters['total_count']
  probs = self.parameters['probs']
  f_n = tf.cast(n, tf.float32)
  f_val = tf.cast(val, tf.float32)
  result = tf.reduce_sum(tf.log(probs) * f_val, -1)
  result += tf.lgamma(f_n + 1) - tf.reduce_sum(tf.lgamma(f_val + 1), -1)
  return result
예제 #7
0
파일: densities.py 프로젝트: blutooth/dgp
def student_t(x, mean, scale, deg_free):
    const = tf.lgamma(tf.cast((deg_free + 1.) * 0.5, tf.float64))\
        - tf.lgamma(tf.cast(deg_free * 0.5, tf.float64))\
        - 0.5*(tf.log(tf.square(scale)) + tf.cast(tf.log(deg_free), tf.float64)
               + np.log(np.pi))
    const = tf.cast(const, tf.float64)
    return const - 0.5*(deg_free + 1.) * \
        tf.log(1. + (1. / deg_free) * (tf.square((x - mean) / scale)))
예제 #8
0
def binomial_log_prob(self, val):
  n = self.parameters['total_count']
  probs = self.parameters['probs']
  f_n = tf.cast(n, tf.float32)
  f_val = tf.cast(val, tf.float32)
  result = f_val * tf.log(probs) + (f_n - f_val) * tf.log(1.0 - probs)
  result += tf.lgamma(f_n + 1) - tf.lgamma(f_val + 1) - \
      tf.lgamma(f_n - f_val + 1)
  return result
예제 #9
0
def log_nb_positive(x, mu, theta, eps=1e-8):
    """
    log likelihood (scalar) of a minibatch according to a nb model. 
    
    Variables:
    mu: mean of the negative binomial (has to be positive support) (shape: minibatch x genes)
    theta: inverse dispersion parameter (has to be positive support) (shape: minibatch x genes)
    eps: numerical stability constant
    """    
    res = tf.lgamma(x + theta) - tf.lgamma(theta) - tf.lgamma(x + 1) + x * tf.log(mu + eps) \
                                - x * tf.log(theta + mu + eps) + theta * tf.log(theta + eps) \
                                - theta * tf.log(theta + mu + eps)
    return tf.reduce_sum(res, axis=-1)
예제 #10
0
def chi2_log_prob(self, val):
  df = self.parameters['df']
  eta = 0.5 * df - 1
  result = tf.reduce_sum(eta * tf.log(val), -1)
  result += tf.exp(-0.5 * val)
  result -= tf.lgamma(eta + 1) + (eta + 1) * tf.log(2.0)
  return result
예제 #11
0
def gamma_log_prob(self, val):
  conc = self.parameters['concentration']
  rate = self.parameters['rate']
  result = (conc - 1.0) * tf.log(val)
  result -= rate * val
  result += -tf.lgamma(conc) + conc * tf.log(rate)
  return result
예제 #12
0
def inverse_gamma_log_prob(self, val):
  conc = self.parameters['concentration']
  rate = self.parameters['rate']
  result = -(conc + 1) * tf.log(val)
  result -= rate * tf.reciprocal(val)
  result += -tf.lgamma(conc) + conc * tf.log(rate)
  return result
예제 #13
0
  def testPoissonLogPmfContinuousRelaxation(self):
    batch_size = 12
    lam = tf.constant([3.0] * batch_size)
    x = np.array([-3., -0.5, 0., 2., 2.2, 3., 3.1, 4., 5., 5.5, 6., 7.]).astype(
        np.float32)
    poisson = self._make_poisson(rate=lam,
                                 interpolate_nondiscrete=True)

    expected_continuous_log_pmf = (x * poisson.log_rate - tf.lgamma(1. + x)
                                   - poisson.rate)
    neg_inf = tf.fill(
        tf.shape(expected_continuous_log_pmf),
        value=np.array(-np.inf,
                       dtype=expected_continuous_log_pmf.dtype.as_numpy_dtype))
    expected_continuous_log_pmf = tf.where(x >= 0.,
                                           expected_continuous_log_pmf,
                                           neg_inf)
    expected_continuous_pmf = tf.exp(expected_continuous_log_pmf)

    log_pmf = poisson.log_prob(x)
    self.assertEqual(log_pmf.get_shape(), (batch_size,))
    self.assertAllClose(self.evaluate(log_pmf),
                        self.evaluate(expected_continuous_log_pmf))

    pmf = poisson.prob(x)
    self.assertEqual(pmf.get_shape(), (batch_size,))
    self.assertAllClose(self.evaluate(pmf),
                        self.evaluate(expected_continuous_pmf))
예제 #14
0
파일: poisson.py 프로젝트: ibab/tensorprob
def Poisson(lambda_, name=None):
    k = tf.placeholder(config.int_dtype, name=name)

    Distribution.logp = k*lambda_ - lambda_ + tf.lgamma(k+1)

    # TODO Distribution.integral = ...

    return k
예제 #15
0
 def actual_hypersphere_volume(dims, radius):
   # https://en.wikipedia.org/wiki/Volume_of_an_n-ball
   # Using tf.lgamma because we'd have to otherwise use SciPy which is not
   # a required dependency of core.
   radius = np.asarray(radius)
   dims = tf.cast(dims, dtype=radius.dtype)
   return tf.exp((dims / 2.) * np.log(np.pi) - tf.lgamma(1. + dims / 2.) +
                 dims * tf.log(radius))
예제 #16
0
 def _log_unnormalized_prob(self, x):
   # The log-probability at negative points is always -inf.
   # Catch such x's and set the output value accordingly.
   safe_x = tf.maximum(x if self.interpolate_nondiscrete else tf.floor(x), 0.)
   y = safe_x * self.log_rate - tf.lgamma(1. + safe_x)
   is_supported = tf.broadcast_to(tf.equal(x, safe_x), tf.shape(y))
   neg_inf = tf.fill(tf.shape(y),
                     value=np.array(-np.inf, dtype=y.dtype.as_numpy_dtype))
   return tf.where(is_supported, y, neg_inf)
예제 #17
0
 def variational_expectations(self, Fmu, Fvar, Y):
     if self.invlink is tf.exp:
         return (
             -self.shape * Fmu
             - tf.lgamma(self.shape)
             + (self.shape - 1.0) * tf.log(Y)
             - Y * tf.exp(-Fmu + Fvar / 2.0)
         )
     else:
         return Likelihood.variational_expectations(self, Fmu, Fvar, Y)
예제 #18
0
  def _log_normalization(self, name='log_normalization'):
    """Returns the log normalization of an LKJ distribution.

    Args:
      name: Python `str` name prefixed to Ops created by this function.

    Returns:
      log_z: A Tensor of the same shape and dtype as `concentration`, containing
        the corresponding log normalizers.
    """
    # The formula is from D. Lewandowski et al [1], p. 1999, from the
    # proof that eqs 16 and 17 are equivalent.
    with tf.name_scope('log_normalization_lkj', name, [self.concentration]):
      logpi = np.log(np.pi)
      ans = tf.zeros_like(self.concentration)
      for k in range(1, self.dimension):
        ans += logpi * (k / 2.)
        ans += tf.lgamma(self.concentration + (self.dimension - 1 - k) / 2.)
        ans -= tf.lgamma(self.concentration + (self.dimension - 1) / 2.)
      return ans
예제 #19
0
def GumbelSoftmaxLogDensity(y, p, tau):
    # EPS = tf.constant(1e-10)
    k = tf.shape(y)[-1]
    k = tf.cast(k, tf.float32)
    # y = y + EPS
    # y = tf.divide(y, tf.reduce_sum(y, -1, keep_dims=True))
    y = normalize_to_unit_sum(y)
    sum_p_over_y = tf.reduce_sum(tf.divide(p, tf.pow(y, tau)), -1)
    logp = tf.lgamma(k)
    logp = logp + (k - 1) * tf.log(tau)
    logp = logp - k * tf.log(sum_p_over_y)
    logp = logp + sum_p_over_y
    return logp
예제 #20
0
    def __init__(self, train_set, test_set, dictparam):
        super(Elosplit, self).__init__(train_set, test_set, dictparam)

        k = tf.constant(map(lambda x: float(x), range(10)))
        last_vect = tf.expand_dims(ToolBox.last_vector(10),0)
        win_vector = ToolBox.win_vector(10)

        # Define parameters
        self.elo_atk = tf.Variable(tf.zeros([self.nb_teams, self.nb_times]))
        self.elo_def = tf.Variable(tf.zeros([self.nb_teams, self.nb_times]))

        # Define the model
        for key, proxy in [('train', self.train_data), ('test', self.test_data)]:
            elo_atk_h = ToolBox.get_elomatch(proxy['team_h'], proxy['time'], self.elo_atk)
            elo_def_h = ToolBox.get_elomatch(proxy['team_h'], proxy['time'], self.elo_def)
            elo_atk_a = ToolBox.get_elomatch(proxy['team_a'], proxy['time'], self.elo_atk)
            elo_def_a = ToolBox.get_elomatch(proxy['team_a'], proxy['time'], self.elo_def)
            lambda_h = tf.expand_dims(tf.exp(self.param['goals_bias'] + elo_atk_h - elo_def_a), 1)
            lambda_a = tf.expand_dims(tf.exp(self.param['goals_bias'] + elo_atk_a - elo_def_h), 1)
            score_h = tf.exp(-lambda_h + tf.log(lambda_h) * k - tf.lgamma(k + 1))
            score_a = tf.exp(-lambda_a + tf.log(lambda_a) * k - tf.lgamma(k + 1))
            score_h += tf.matmul(tf.expand_dims((1. - tf.reduce_sum(score_h, reduction_indices=[1])), 1), last_vect)
            score_a += tf.matmul(tf.expand_dims((1. - tf.reduce_sum(score_a, reduction_indices=[1])), 1), last_vect)

            self.score[key] = tf.batch_matmul(tf.expand_dims(score_h, 2), tf.expand_dims(score_a, 1))
            self.res[key] = tf.reduce_sum(self.score[key] * win_vector, reduction_indices=[1,2])

        # Define the costs
        self.init_cost()
        for key in ['train', 'test']:
            for proxy in [self.elo_atk, self.elo_def]:
                cost = ToolBox.get_raw_elo_cost(self.param['metaparam0'], self.param['metaparam1'], proxy, self.nb_times)
                self.regulizer[key].append(cost)

                cost = ToolBox.get_timediff_elo_cost(self.param['metaparam2'], proxy, self.nb_times)
                self.regulizer[key].append(cost)

        # Finish the initialization
        super(Elosplit, self).finish_init()
예제 #21
0
def log_zinb_positive(x, mu, theta, pi, eps=1e-8):
    """
    log likelihood (scalar) of a minibatch according to a zinb model. 

    Notes:
    We parametrize the bernouilli using the logits, hence the softplus functions appearing
    
    Variables:
    mu: mean of the negative binomial (has to be positive support) (shape: minibatch x genes)
    theta: inverse dispersion parameter (has to be positive support) (shape: minibatch x genes)
    pi: logit of the dropout parameter (real support) (shape: minibatch x genes)
    eps: numerical stability constant
    """
    case_zero = tf.nn.softplus(- pi + theta * tf.log(theta + eps) - theta * tf.log(theta + mu + eps)) \
                                - tf.nn.softplus( - pi)
    case_non_zero = - pi - tf.nn.softplus(- pi) \
                                + theta * tf.log(theta + eps) - theta * tf.log(theta + mu + eps) \
                                + x * tf.log(mu + eps) - x * tf.log(theta + mu + eps) \
                                + tf.lgamma(x + theta) - tf.lgamma(theta) - tf.lgamma(x + 1)
    
    mask = tf.cast(tf.less(x, eps), tf.float32)
    res = tf.multiply(mask, case_zero) + tf.multiply(1 - mask, case_non_zero)
    return tf.reduce_sum(res, axis=-1)
예제 #22
0
  def logp(self, bin_counts):
    """Compute the log probability for the counts in the bin, under the model.

    Args:
      bin_counts: array-like integer counts

    Returns:
      The log-probability under the Poisson models for each element of
      bin_counts.
    """
    k = tf.to_float(bin_counts)
    # log poisson(k, r) = log(r^k * e^(-r) / k!) = k log(r) - r - log k!
    # log poisson(k, r=exp(x)) = k * x - exp(x) - lgamma(k + 1)
    return k * self.logr - tf.exp(self.logr) - tf.lgamma(k + 1)
예제 #23
0
  def logpmf(self, x, n, p):
    """Log of the probability mass function.

    Parameters
    ----------
    x : tf.Tensor
      A n-D tensor.
    n : int
      A tensor of same shape as ``x``, and with all elements
      constrained to :math:`n > 0`.
    p : tf.Tensor
      A tensor of same shape as ``x``, and with all elements
      constrained to :math:`p\in(0,1)`.

    Returns
    -------
    tf.Tensor
      A tensor of same shape as input.
    """
    x = tf.cast(x, dtype=tf.float32)
    n = tf.cast(n, dtype=tf.float32)
    p = tf.cast(p, dtype=tf.float32)
    return tf.lgamma(x + n) - tf.lgamma(x + 1.0) - tf.lgamma(n) + \
        n * tf.log(p) + x * tf.log(1.0 - p)
예제 #24
0
def Poisson(lambda_, name=None):
    k = tf.placeholder(config.int_dtype, name=name)

    # FIXME tf.lgamma only supports floats so cast before
    Distribution.logp = (
        tf.cast(k, config.dtype)*tf.log(lambda_) -
        lambda_ -
        tf.lgamma(tf.cast(k+1, config.dtype))
    )

    # TODO Distribution.integral = ...
    def integral(l, u):
        return tf.constant(1, dtype=config.dtype)
    Distribution.integral = integral

    return k
예제 #25
0
 def logpdf(self, x, df):
   """Log of the probability density function.
   Parameters
   ----------
   x : tf.Tensor
     A n-D tensor.
   df : tf.Tensor
     A tensor of same shape as ``x``, and with all elements
     constrained to :math:`df > 0`.
   Returns
   -------
   tf.Tensor
     A tensor of same shape as input.
   """
   x = tf.cast(x, dtype=tf.float32)
   df = tf.cast(df, dtype=tf.float32)
   return (0.5 * df - 1) * tf.log(x) - 0.5 * x - \
       0.5 * df * tf.log(2.0) - tf.lgamma(0.5 * df)
예제 #26
0
    def logpmf(self, x, mu):
        """Log of the probability mass function.

        Parameters
        ----------
        x : tf.Tensor
            A n-D tensor.
        mu : tf.Tensor
            A tensor of same shape as ``x``, and with all elements
            constrained to :math:`mu > 0`.

        Returns
        -------
        tf.Tensor
            A tensor of same shape as input.
        """
        x = tf.cast(x, dtype=tf.float32)
        mu = tf.cast(mu, dtype=tf.float32)
        return x * tf.log(mu) - mu - tf.lgamma(x + 1.0)
예제 #27
0
 def _log_normalization(self):
   """Computes the log-normalizer of the distribution."""
   event_dim = self.event_shape[0].value
   if event_dim is None:
     raise ValueError('vMF _log_normalizer currently only supports '
                      'statically known event shape')
   safe_conc = tf.where(self.concentration > 0,
                        self.concentration,
                        tf.ones_like(self.concentration))
   safe_lognorm = ((event_dim / 2 - 1) * tf.log(safe_conc) -
                   (event_dim / 2) * np.log(2 * np.pi) -
                   tf.log(_bessel_ive(event_dim / 2 - 1, safe_conc)) -
                   tf.abs(safe_conc))
   log_nsphere_surface_area = (
       np.log(2.) + (event_dim / 2) * np.log(np.pi) -
       tf.lgamma(tf.cast(event_dim / 2, self.dtype)))
   return tf.where(self.concentration > 0,
                   -safe_lognorm,
                   log_nsphere_surface_area * tf.ones_like(safe_lognorm))
예제 #28
0
    def entropy(self, a, scale=1):
        """Entropy of probability distribution.

        Parameters
        ----------
        a : tf.Tensor
            **Shape** parameter. A n-D tensor with all elements
            constrained to :math:`a > 0`.
        scale : tf.Tensor
            **Scale** parameter. A n-D tensor with all elements
            constrained to :math:`scale > 0`.

        Returns
        -------
        tf.Tensor
            A tensor of same shape as input.
        """
        a = tf.cast(a, dtype=tf.float32)
        scale = tf.cast(scale, dtype=tf.float32)
        return a + tf.log(scale*tf.exp(tf.lgamma(a))) - \
               (1.0 + a) * tf.digamma(a)
예제 #29
0
    def logpdf(self, x, a, scale=1):
        """Log of the probability density function.

        Parameters
        ----------
        x : tf.Tensor
            A n-D tensor.
        a : tf.Tensor
            **Shape** parameter. A tensor of same shape as ``x``, and with
            all elements constrained to :math:`a > 0`.
        scale : tf.Tensor
            **Scale** parameter. A tensor of same shape as ``x``, and with
            all elements constrained to :math:`scale > 0`.

        Returns
        -------
        tf.Tensor
            A tensor of same shape as input.
        """
        x = tf.cast(x, dtype=tf.float32)
        a = tf.cast(a, dtype=tf.float32)
        scale = tf.cast(scale, dtype=tf.float32)
        return (a - 1.0) * tf.log(x) - x/scale - a * tf.log(scale) - tf.lgamma(a)
 def _log_prob(self, x):
   x = self._assert_valid_sample(x)
   # broadcast logits or x if need be.
   logits = self.logits
   if (not x.shape.is_fully_defined() or
       not logits.shape.is_fully_defined() or
       x.shape != logits.shape):
     logits = tf.ones_like(x, dtype=logits.dtype) * logits
     x = tf.ones_like(logits, dtype=x.dtype) * x
   logits_shape = tf.shape(tf.reduce_sum(logits, axis=[-1]))
   logits_2d = tf.reshape(logits, [-1, self.event_size])
   x_2d = tf.reshape(x, [-1, self.event_size])
   # compute the normalization constant
   k = tf.cast(self.event_size, x.dtype)
   log_norm_const = (tf.lgamma(k) + (k - 1.) * tf.log(self.temperature))
   # compute the unnormalized density
   log_softmax = tf.nn.log_softmax(logits_2d - x_2d * self._temperature_2d)
   log_unnorm_prob = tf.reduce_sum(log_softmax, [-1], keepdims=False)
   # combine unnormalized density with normalization constant
   log_prob = log_norm_const + log_unnorm_prob
   # Reshapes log_prob to be consistent with shape of user-supplied logits
   ret = tf.reshape(log_prob, logits_shape)
   return ret
예제 #31
0
def gamma(x, shape, scale):
    return -shape * tf.log(scale) - tf.lgamma(shape)\
        + (shape - 1.) * tf.log(x) - x / scale
예제 #32
0
 def _log_normalization(self):
   return (tf.log(tf.abs(self.scale)) +
           0.5 * tf.log(self.df) +
           0.5 * np.log(np.pi) +
           tf.lgamma(0.5 * self.df) -
           tf.lgamma(0.5 * (self.df + 1.)))
예제 #33
0
 def _entropy(self):
     return (self.concentration + tf.log(self.rate) +
             tf.lgamma(self.concentration) -
             ((1. + self.concentration) * tf.digamma(self.concentration)))
예제 #34
0
 def log_q(self, z, alpha, beta):
     return (alpha - 1) * tf.log(z) - beta * z + alpha * tf.log(
         beta) - tf.lgamma(alpha)
예제 #35
0
def GO_Gamma_v2(x, alpha):
    # x sim Gamma(alpha, 1)
    x = tf.cast(x, tf.float64)
    alpha = tf.cast(alpha, tf.float64)

    logx = tf.log(x)
    ex_gamma_xa = tf.exp(x + tf.lgamma(alpha) + (1. - alpha) * logx)
    psi_m_log = tf.digamma(alpha + 1.) - logx
    igamma_up_reg = tf.igammac(alpha, x)

    # Part 1
    indx1 = tf.where(x <= 1e-2)
    x_indx1 = tf.gather_nd(x, indx1)
    alpha_indx1 = tf.gather_nd(alpha, indx1)
    GO_Gamma_alpha_value1 = tf.exp(x_indx1) * x_indx1 / alpha_indx1 * (
            tf.gather_nd(psi_m_log, indx1) +
            x_indx1 / tf.pow(alpha_indx1 + 1., 2) -
            tf.pow(x_indx1 / (alpha_indx1 + 2.), 2) +
            0.5 * tf.pow(x_indx1, 3) / tf.pow(alpha_indx1 + 3., 2)
    )

    # Part 2
    N_alpha = tf.round(tf.exp(
        - 0.488484605941243044124888683654717169702053070068359375 * tf.log(alpha)
        + 1.6948389987594634220613443176262080669403076171875
    ))
    indx2 = tf.where(tf.logical_and(
        tf.logical_and(x > 1e-2, alpha <= 3.),
        (x <= (alpha + N_alpha * tf.sqrt(alpha)))
    ))
    KK = 15
    kk = tf.cast(tf.range(1, KK + 1), tf.float64)
    x_indx2 = tf.gather_nd(x, indx2)
    alpha_indx2 = tf.gather_nd(alpha, indx2)
    GO_Gamma_alpha_value2 = tf.gather_nd(ex_gamma_xa, indx2) * (
            -tf.gather_nd(psi_m_log, indx2) * tf.gather_nd(igamma_up_reg, indx2) +
            (
                    tf.digamma(alpha_indx2 + KK + 1.) - tf.gather_nd(logx, indx2) -
                    tf.reduce_sum(
                        tf.igammac(tf.expand_dims(alpha_indx2, 1) + tf.expand_dims(kk, 0), tf.expand_dims(x_indx2, 1)) /
                        (tf.expand_dims(alpha_indx2, 1) + tf.expand_dims(kk, 0))
                        , 1)
            )
    )

    # Part 2_1
    indx2_1 = tf.where(tf.logical_and(
        tf.logical_and(x > 1e-2, alpha <= 3.),
        (x > (alpha + N_alpha * tf.sqrt(alpha)))
    ))
    KK = 15
    kk = tf.cast(tf.range(1, KK + 1), tf.float64)
    x_indx2_1 = tf.gather_nd(x, indx2_1)
    alpha_indx2_1 = tf.gather_nd(alpha, indx2_1)
    GO_Gamma_alpha_value2_1 = tf.gather_nd(ex_gamma_xa, indx2_1) * (
            -tf.gather_nd(psi_m_log, indx2_1) * tf.gather_nd(igamma_up_reg, indx2_1) +
            (
                    tf.digamma(alpha_indx2_1 + KK + 1.) - tf.gather_nd(logx, indx2_1) -
                    tf.reduce_sum(
                        tf.igammac(tf.expand_dims(alpha_indx2_1, 1) + tf.expand_dims(kk, 0),
                                   tf.expand_dims(x_indx2_1, 1)) /
                        (tf.expand_dims(alpha_indx2_1, 1) + tf.expand_dims(kk, 0))
                        , 1)
            )
    )
    GO_Gamma_alpha_value2_1 = tf.maximum(
        GO_Gamma_alpha_value2_1,
        1. / alpha_indx2_1 - tf.gather_nd(ex_gamma_xa, indx2_1) *
        tf.gather_nd(psi_m_log, indx2_1) * tf.gather_nd(igamma_up_reg, indx2_1)
    )

    # Part 3
    indx3 = tf.where(
        tf.logical_and(
            tf.logical_and(x > 1e-2, alpha > 3.),
            alpha <= 500.
        )
    )
    KK = 10
    kk = tf.cast(tf.range(1, KK + 1), tf.float64)
    x_indx3 = tf.gather_nd(x, indx3)
    alpha_indx3 = tf.gather_nd(alpha, indx3)

    x_l = alpha_indx3 - tf.log(alpha_indx3) * tf.sqrt(alpha_indx3)

    logx_l = tf.log(x_l)
    ex_gamma_xa_l = tf.exp(x_l + tf.lgamma(alpha_indx3) + (1. - alpha_indx3) * logx_l)
    psi_m_log_l = tf.digamma(alpha_indx3 + 1.) - logx_l
    igamma_low_reg_l = tf.igamma(alpha_indx3, x_l)
    # igamma_up_reg_l = tf.igammac(alpha_indx3, x_l)
    # f_l = ex_gamma_xa_l * (
    #         -psi_m_log_l * igamma_up_reg_l +
    #         (tf.digamma(alpha_indx3 + KK + 1.) - logx_l -
    #         tf.reduce_sum(
    #             tf.igammac(tf.expand_dims(alpha_indx3, 1) + tf.expand_dims(kk, 0), tf.expand_dims(x_l, 1)) /
    #             (tf.expand_dims(alpha_indx3, 1) + tf.expand_dims(kk, 0))
    #             , 1))
    # )
    f_l = ex_gamma_xa_l * (
            psi_m_log_l * igamma_low_reg_l +
            tf.reduce_sum(
                tf.igamma(tf.expand_dims(alpha_indx3, 1) + tf.expand_dims(kk, 0), tf.expand_dims(x_l, 1)) /
                (tf.expand_dims(alpha_indx3, 1) + tf.expand_dims(kk, 0))
                , 1)
    )

    g_l = (1. + (1. - alpha_indx3) / x_l) * f_l + (
            -ex_gamma_xa_l / x_l * igamma_low_reg_l + (psi_m_log_l +
                                                       tf.reduce_sum(
                                                           tf.exp(
                                                               tf.expand_dims(kk, 0) * tf.log(tf.expand_dims(x_l, 1)) +
                                                               tf.lgamma(tf.expand_dims(alpha_indx3, 1)) -
                                                               tf.lgamma(
                                                                   tf.expand_dims(alpha_indx3, 1) + tf.expand_dims(kk,
                                                                                                                   0) + 1.)
                                                           )
                                                           , 1))
    )

    x_m = alpha_indx3

    f_m = 1. + 0.167303227226226980395296095593948848545551300048828125 / \
          (
                  tf.pow(x_m, 1.0008649793164192676186985409003682434558868408203125) -
                  0.07516433982238841793321881823430885560810565948486328125
          )

    x_r = 2. * alpha_indx3 - x_l

    f_r = 1. / alpha_indx3 - tf.exp(x_r + tf.lgamma(alpha_indx3) + (1. - alpha_indx3) * tf.log(x_r)) * (
            (tf.digamma(alpha_indx3 + 1.) - tf.log(x_r)) * tf.igammac(alpha_indx3, x_r)
    )
    lambda_r = tf.exp(
        959.627335718427275423891842365264892578125 / (
                tf.pow(alpha_indx3, 1.324768828487964622553363369661383330821990966796875) +
                142.427456986662718918523751199245452880859375
        )
        - 13.01439996187340142341781756840646266937255859375
    )

    x_mat_i = tf.concat([tf.expand_dims(x_l, 1), tf.expand_dims(x_m, 1), tf.expand_dims(x_r, 1)], 1)
    x_mat_bar_i = x_mat_i - tf.expand_dims(alpha_indx3, 1)
    x_mat_hat_i = tf.sqrt(x_mat_i) - tf.sqrt(tf.expand_dims(alpha_indx3, 1))
    f_mat_i = tf.concat([tf.expand_dims(f_l, 1), tf.expand_dims(f_m, 1), tf.expand_dims(f_r, 1)], 1)
    lambda_mat_i = tf.concat([tf.expand_dims(tf.ones_like(alpha_indx3), 1),
                              tf.expand_dims(tf.ones_like(alpha_indx3), 1),
                              tf.expand_dims(lambda_r, 1)
                              ], 1)

    x_mat_j = tf.expand_dims(x_l, 1)
    g_mat_j = tf.expand_dims(g_l, 1)
    lambda_mat_j = tf.expand_dims(tf.ones_like(alpha_indx3), 1)

    A = tf.reduce_sum(lambda_mat_i * tf.pow(x_mat_bar_i, 2), 1) + tf.reduce_sum(lambda_mat_j, 1)
    B = tf.reduce_sum(lambda_mat_i * x_mat_bar_i * x_mat_hat_i, 1) + \
        tf.reduce_sum(lambda_mat_j / 2. / tf.sqrt(x_mat_j), 1)
    C = tf.reduce_sum(lambda_mat_i * x_mat_bar_i, 1)
    D = tf.reduce_sum(lambda_mat_i * tf.pow(x_mat_hat_i, 2), 1) + tf.reduce_sum(lambda_mat_j / 4. / x_mat_j, 1)
    E = tf.reduce_sum(lambda_mat_i * x_mat_hat_i, 1)
    F = tf.reduce_sum(lambda_mat_i, 1)
    G = tf.reduce_sum(lambda_mat_i * x_mat_bar_i * f_mat_i, 1) + tf.reduce_sum(lambda_mat_j * g_mat_j, 1)
    H = tf.reduce_sum(lambda_mat_i * x_mat_hat_i * f_mat_i, 1) + \
        tf.reduce_sum(lambda_mat_j / 2. / tf.sqrt(x_mat_j) * g_mat_j, 1)
    I = tf.reduce_sum(lambda_mat_i * f_mat_i, 1)

    Z = F * tf.pow(B, 2) - 2. * B * C * E + D * tf.pow(C, 2) + A * tf.pow(E, 2) - A * D * F

    a_cor = 1. / Z * (G * (tf.pow(E, 2) - D * F) + H * (B * F - C * E) - I * (B * E - C * D))
    b_cor = 1. / Z * (G * (B * F - C * E) + H * (tf.pow(C, 2) - A * F) - I * (B * C - A * E))
    c_cor = 1. / Z * (-G * (B * E - C * D) + I * (tf.pow(B, 2) - A * D) - H * (B * C - A * E))

    GO_Gamma_alpha_value3 = a_cor * (x_indx3 - alpha_indx3) + b_cor * (tf.sqrt(x_indx3) - tf.sqrt(alpha_indx3)) + c_cor
    GO_Gamma_alpha_value3 = tf.maximum(
        GO_Gamma_alpha_value3,
        1. / alpha_indx3 - tf.gather_nd(ex_gamma_xa, indx3) *
        tf.gather_nd(psi_m_log, indx3) * tf.gather_nd(igamma_up_reg, indx3)
    )

    # Part 4
    # indx4 = tf.where(
    #     tf.logical_and(
    #         tf.logical_and(x > 1e-2, alpha > 500.),
    #         (x <= (alpha + 2. * tf.log(alpha) * tf.sqrt(alpha)))
    #     )
    # )
    indx4 = tf.where(
        tf.logical_and(x > 1e-2, alpha > 500.)
    )

    x_indx4 = tf.gather_nd(x, indx4)
    alpha_indx4 = tf.gather_nd(alpha, indx4)

    f_m_large = 1. + 0.167303227226226980395296095593948848545551300048828125 / \
                (
                        tf.pow(alpha_indx4, 1.0008649793164192676186985409003682434558868408203125) -
                        0.07516433982238841793321881823430885560810565948486328125
                )
    g_m_large = 0.54116502161502622048061539317131973803043365478515625 * \
                tf.pow(alpha_indx4, -1.010274491769996618728555404231883585453033447265625)

    GO_Gamma_alpha_value4 = f_m_large + g_m_large * (x_indx4 - alpha_indx4)

    # Part 4_1
    # indx4_1 = tf.where(
    #     tf.logical_and(
    #         tf.logical_and(x > 1e-2, alpha > 500.),
    #         (x > (alpha + 2. * tf.log(alpha) * tf.sqrt(alpha)))
    #     )
    # )
    # alpha_indx4_1 = tf.gather_nd(alpha, indx4_1)
    # GO_Gamma_alpha_value4_1 = 1. / alpha_indx4_1 - tf.gather_nd(ex_gamma_xa, indx4_1) * \
    #     tf.gather_nd(psi_m_log, indx4_1) * tf.gather_nd(igamma_up_reg, indx4_1)

    # Summerize
    GO_Gamma_alpha = tf.sparse_to_dense(indx1, x.shape, GO_Gamma_alpha_value1) + \
                     tf.sparse_to_dense(indx2, x.shape, GO_Gamma_alpha_value2) + \
                     tf.sparse_to_dense(indx2_1, x.shape, GO_Gamma_alpha_value2_1) + \
                     tf.sparse_to_dense(indx3, x.shape, GO_Gamma_alpha_value3) + \
                     tf.sparse_to_dense(indx4, x.shape, GO_Gamma_alpha_value4)
    # + \
    # tf.sparse_to_dense(indx4_1, x.shape, GO_Gamma_alpha_value4_1)

    GO_Gamma_alpha = tf.cast(GO_Gamma_alpha, tf.float32)

    return GO_Gamma_alpha  # , x_l, x_r, f_l, f_m, f_r, g_l
예제 #36
0
    def _verifySampleAndPdfConsistency(self, vmf, rtol=0.075):
        """Verifies samples are consistent with the PDF using importance sampling.

    In particular, we verify an estimate the surface area of the n-dimensional
    hypersphere, and the surface areas of the spherical caps demarcated by
    a handful of survival rates.

    Args:
      vmf: A `VonMisesFisher` distribution instance.
      rtol: Relative difference tolerable.
    """
        dim = tf.dimension_value(vmf.event_shape[-1])
        nsamples = 50000
        samples = vmf.sample(sample_shape=[nsamples])
        samples = tf.check_numerics(samples, 'samples')
        log_prob = vmf.log_prob(samples)
        log_prob = tf.check_numerics(log_prob, 'log_prob')
        log_importance = -log_prob
        sphere_surface_area_estimate, samples, importance, conc = self.evaluate(
            [
                tf.exp(
                    tf.reduce_logsumexp(log_importance, axis=0) -
                    tf.log(tf.to_float(nsamples))), samples,
                tf.exp(log_importance), vmf.concentration
            ])
        true_sphere_surface_area = 2 * (np.pi)**(dim / 2) * self.evaluate(
            tf.exp(-tf.lgamma(dim / 2)))
        # Broadcast to correct size
        true_sphere_surface_area += np.zeros_like(sphere_surface_area_estimate)
        # Highly concentrated distributions do not get enough coverage to provide
        # a reasonable full-sphere surface area estimate. These are covered below
        # by CDF-based hypersphere cap surface area estimates.
        self.assertAllClose(true_sphere_surface_area[np.where(conc < 3)],
                            sphere_surface_area_estimate[np.where(conc < 3)],
                            rtol=rtol)

        # Assert surface area of hyperspherical cap For some CDFs in [.05,.45],
        # (h must be greater than 0 for the hypersphere cap surface area
        # calculation to hold).
        for survival_rate in 0.95, .9, .75, .6:
            cdf = (1 - survival_rate)
            mean_dir = self.evaluate(vmf.mean_direction)
            dotprods = np.sum(samples * mean_dir, -1)
            # Empirical estimate of the effective dot-product of the threshold that
            # selects for a given CDF level, that is the cosine of the largest
            # passable angle, or the minimum cosine for a within-CDF sample.
            dotprod_thresh = np.percentile(dotprods,
                                           100 * survival_rate,
                                           axis=0,
                                           keepdims=True)
            dotprod_above_thresh = np.float32(dotprods > dotprod_thresh)
            sphere_cap_surface_area_ests = (
                cdf * (importance * dotprod_above_thresh).sum(0) /
                dotprod_above_thresh.sum(0))
            h = (1 - dotprod_thresh)
            self.assertGreaterEqual(h.min(),
                                    0)  # h must be >= 0 for the eqn below
            true_sphere_cap_surface_area = (
                0.5 * true_sphere_surface_area *
                self.evaluate(tf.betainc((dim - 1) / 2, 0.5, 2 * h - h**2)))
            if dim == 3:  # For 3-d we have a simpler form we can double-check.
                self.assertAllClose(2 * np.pi * h,
                                    true_sphere_cap_surface_area)

            self.assertAllClose(true_sphere_cap_surface_area,
                                sphere_cap_surface_area_ests +
                                np.zeros_like(true_sphere_cap_surface_area),
                                rtol=rtol)
예제 #37
0
파일: lik_mcpm_gp.py 프로젝트: zcmail/MCPM
    def log_cond_prob(self, num_samples, outputs, latent_samples, sample_means,
                      sample_vars, weights_samples, means_w, covars_weights,
                      *args):

        full_covar = init_list(0.0, [self.num_latent])

        for q in range(self.num_latent):
            covar_input = covars_weights[q, :, :]
            full_covar[q] = tf.matmul(covar_input, tf.transpose(covar_input))
        full_covar = tf.stack(full_covar)

        ### NOTE
        ### USE WITH TRUNCATION
        # Improve identifiability
        # full_covar = tf.matrix_band_part(full_covar, -1, 0)
        # means_w = tf.matrix_band_part(means_w, -1, 0)

        #cov_diagonal = tf.matrix_band_part(tf.transpose(tf.matrix_diag_part(full_covar)), -1, 0)
        #var_w = cov_diagonal

        means_w = tf.transpose(means_w)
        cov_diagonal = tf.transpose(tf.matrix_diag_part(full_covar))
        var_w = cov_diagonal

        prediction = init_list(0.0, [self.num_tasks])

        outputs_no_missing = tf.boolean_mask(outputs,
                                             self.ytrain_non_missing_index)
        product_exp = tf.transpose(
            tf.matmul(means_w, tf.transpose(sample_means)) + self.offsets)

        means_w = tf.expand_dims(means_w, axis=2),
        var_w = tf.expand_dims(var_w, axis=2)
        sample_means = tf.transpose(tf.tile(
            tf.expand_dims(sample_means, axis=0), [self.num_tasks, 1, 1]),
                                    perm=[0, 2, 1])
        sample_vars = tf.transpose(tf.tile(tf.expand_dims(sample_vars, axis=0),
                                           [self.num_tasks, 1, 1]),
                                   perm=[0, 2, 1])

        first_term = tf.reduce_prod(tf.exp(
            (tf.multiply(means_w, sample_means)[0] +
             (tf.multiply(tf.square(means_w), sample_vars)[0] +
              tf.multiply(var_w, tf.square(sample_means))) / 2) /
            ((1.0 - tf.multiply(var_w, sample_vars)))),
                                    axis=1)
        second_term = tf.reduce_prod(
            1.0 / tf.sqrt(1.0 - tf.multiply(var_w, sample_vars)), axis=1)

        prediction = tf.transpose(first_term * second_term *
                                  tf.exp(self.offsets))

        prediction_no_missing = tf.boolean_mask(prediction,
                                                self.ytrain_non_missing_index)
        product_exp_no_missing = tf.boolean_mask(product_exp,
                                                 self.ytrain_non_missing_index)

        log_lik = product_exp_no_missing * outputs_no_missing - prediction_no_missing - tf.lgamma(
            outputs_no_missing + 1.0)

        ell = tf.reduce_sum(log_lik)

        return (ell, sample_means, sample_vars, means_w, var_w, self.offsets)
예제 #38
0
 def test_Lgamma(self):
     t = tf.lgamma(self.random(4, 3))
     self.check(t)
예제 #39
0
 def _log_normalization(self):
     return (tf.lgamma(self.concentration) + 0.5 * math.log(2. * math.pi) -
             self.concentration * tf.log(self.rate) -
             0.5 * tf.log(self._lambda))
예제 #40
0
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term, dropout_in):
    # encoding
    mu1, sigma1, mix1 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder1")
    mu2, sigma2, mix2 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder2")
    mu3, sigma3, mix3 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder3")
    mu4, sigma4, mix4 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder4")

    z1 = distributions.Normal(loc=mu1, scale=sigma1)
    z2 = distributions.Normal(loc=mu2, scale=sigma2)
    z3 = distributions.Normal(loc=mu3, scale=sigma3)
    z4 = distributions.Normal(loc=mu4, scale=sigma4)

    init_min = 0.1
    init_max = 0.1
    init_min = (np.log(init_min) - np.log(1. - init_min))
    init_max = (np.log(init_max) - np.log(1. - init_max))

    dropout_a =  tf.get_variable(name='dropout',
                 shape=None,
                 initializer=tf.random_uniform(
                     (1,),
                     init_min,
                     init_max),
                 dtype=tf.float32,
                 trainable=True)

    dropout_p = tf.nn.sigmoid(dropout_a[0])

    eps = 1e-7
    temp = 0.1

    unif_noise = tf.random_uniform(shape=[batch_size,4])
    drop_prob = (
            tf.log(dropout_p + eps)
            - tf.log(1. - dropout_p + eps)
            + tf.log(unif_noise + eps)
            - tf.log(1. - unif_noise + eps)
    )
    drop_prob = tf.nn.sigmoid(drop_prob / temp)
    random_tensor = 1. - drop_prob
    retain_prob = 1. - dropout_p
    dropout_samples = random_tensor / retain_prob

    dropout_regularizer = dropout_p * tf.log(dropout_p)
    dropout_regularizer += (1. - dropout_p) * tf.log(1. - dropout_p)
    dropout_regularizer *= dropout_regularizer *10 * -1
    dropout_regularizer = tf.clip_by_value(dropout_regularizer,-10,0)

    sum1 = mix1 + mix2 + mix3 + mix4
    mix1 = mix1 / sum1
    mix2 = mix2 / sum1
    mix3 = mix3 / sum1
    mix4 = mix4 / sum1

    mix = tf.concat([mix1, mix2, mix3, mix4], 1)
    mix_parameters = mix
    dist = tf.distributions.Dirichlet(mix)
    mix_samples = dist.sample()
    mix = mix_samples

    mix_dropout1 = dropout_samples[:,0:1]*mix_samples[:,0:1]
    mix_dropout2 = dropout_samples[:,1:2]*mix_samples[:,1:2]
    mix_dropout3 = dropout_samples[:,2:3]*mix_samples[:,2:3]
    mix_dropout4 = dropout_samples[:,3:4]*mix_samples[:,3:4]

    sum1 = mix_dropout1+mix_dropout2+mix_dropout3+mix_dropout4
    mix_dropout1 = mix_dropout1/sum1
    mix_dropout2 = mix_dropout2/sum1
    mix_dropout3 = mix_dropout3/sum1
    mix_dropout4 = mix_dropout4/sum1

    # sampling by re-parameterization technique
    # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32)

    z1_samples = z1.sample()
    z2_samples = z2.sample()
    z3_samples = z3.sample()
    z4_samples = z4.sample()

    ttf = []
    ttf.append(z1_samples)
    ttf.append(z2_samples)
    ttf.append(z3_samples)
    ttf.append(z4_samples)

    dHSIC_Value = dHSIC(ttf)

    # decoding
    y1 = Create_SubDecoder(z1_samples, n_hidden, dim_img, keep_prob, "decoder1")
    y2 = Create_SubDecoder(z2_samples, n_hidden, dim_img, keep_prob, "decoder2")
    y3 = Create_SubDecoder(z3_samples, n_hidden, dim_img, keep_prob, "decoder3")
    y4 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob, "decoder4")

    # dropout out
    y1 = y1 * mix_dropout1
    y2 = y2 * mix_dropout2
    y3 = y3 * mix_dropout3
    y4 = y4 * mix_dropout4

    y = y1 + y2 + y3 + y4
    output = Create_FinalDecoder(y, n_hidden, dim_img, keep_prob, "final")
    y = output

    m1 = np.zeros(dim_z, dtype=np.float32)
    m1[:] = 0
    v1 = np.zeros(dim_z, dtype=np.float32)
    v1[:] = 1

    # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32),
    #                           scale=np.ones(dim_z, dtype=np.float32))
    p_z1 = distributions.Normal(loc=m1,
                                scale=v1)

    m2 = np.zeros(dim_z, dtype=np.float32)
    m2[:] = 0
    v2 = np.zeros(dim_z, dtype=np.float32)
    v2[:] = 1
    p_z2 = distributions.Normal(loc=m2,
                                scale=v2)

    m3 = np.zeros(dim_z, dtype=np.float32)
    m3[:] = 0
    v3 = np.zeros(dim_z, dtype=np.float32)
    v3[:] = 1
    p_z3 = distributions.Normal(loc=m3,
                                scale=v3)

    m4 = np.zeros(dim_z, dtype=np.float32)
    m4[:] = 0
    v4 = np.zeros(dim_z, dtype=np.float32)
    v4[:] = 1
    p_z4 = distributions.Normal(loc=m4,
                                scale=v4)

    kl1 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1))
    kl2 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1))
    kl3 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1))
    kl4 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1))

    KL_divergence = (kl1 + kl2 + kl3 + kl4) / 4.0

    # loss
    marginal_likelihood = tf.reduce_sum(x * tf.log(y) + (1 - x) * tf.log(1 - y), 1)

    marginal_likelihood = tf.reduce_mean(marginal_likelihood)

    # KL divergence between two Dirichlet distributions
    a1 = tf.clip_by_value(mix_parameters, 0.1, 0.8)
    a2 = tf.constant((0.25, 0.25, 0.25, 0.25), shape=(batch_size, 4))
    
    r = tf.reduce_sum((a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1)
    a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma(tf.reduce_sum(a2, axis=1)) + tf.reduce_sum(tf.lgamma(a2),
                                                                                                    axis=-1) - tf.reduce_sum(
        tf.lgamma(a1), axis=1) + r
    kl = a
    kl = tf.reduce_mean(kl)

    p1 = 1
    p2 = 1
    p4 = 1
    ELBO = marginal_likelihood - KL_divergence * p2

    loss = -ELBO + kl * p1 + p4 * dHSIC_Value + dropout_regularizer

    z = z1_samples
    return y, z, loss, -marginal_likelihood, dropout_regularizer,dropout_p,dropout_samples
예제 #41
0
def negative_binomial(m, Y, alpha):
    k = 1 / alpha
    return tf.lgamma(k + Y) - tf.lgamma(Y + 1) - tf.lgamma(k) + Y * tf.log(
        m / (m + k)) - k * tf.log(1 + m * alpha)
예제 #42
0
def poisson(lamb, y):
    return y * tf.log(lamb) - lamb - tf.lgamma(y + 1.)
예제 #43
0
 def variational_expectations(self, Fmu, Fvar, Y):
     if self.invlink is tf.exp:
         return -self.shape * Fmu - tf.lgamma(self.shape) \
             + (self.shape - 1.) * tf.log(Y) - Y * tf.exp(-Fmu + Fvar / 2.)
     else:
         return Likelihood.variational_expectations(self, Fmu, Fvar, Y)
예제 #44
0
파일: likelihoods.py 프로젝트: zphilip/MFVI
 def variational_expectations(self, Fmu, Fvar, Y):
     if self.invlink is tf.exp:
         return Y * Fmu - tf.exp(Fmu + Fvar / 2) - tf.lgamma(Y + 1)
     else:
         return Likelihood.variational_expectations(self, Fmu, Fvar, Y)
예제 #45
0
 def _log_normalization(self):
     return (tf.lgamma(self.concentration) -
             self.concentration * tf.log(self.rate))
예제 #46
0
파일: util.py 프로젝트: chitrita/deepimpute
def poisson_loss(y_true, y_pred):
    # mask = tf.cast(y_true>0,tf.float32)
    y_true = y_true + 0.001
    NLL = tf.lgamma(y_pred + 1) - y_pred * tf.log(y_true)
    return tf.reduce_mean(NLL)
예제 #47
0
def mean_poisson_log_likelihood(y_true, y_pred):
    poisson = y_pred - y_true * K.log(y_pred + K.epsilon()) + lgamma(y_true + 1)
    return K.mean(poisson, axis=-1)
예제 #48
0
    def bezier_net(self, inputs, n_points, bezier_degree):

        depth_cpw = 32 * 8
        dim_cpw = int((bezier_degree + 1) / 8)
        kernel_size = (4, 3)
        #        noise_std = 0.01

        cpw = tf.layers.dense(inputs, 1024)
        cpw = tf.layers.batch_normalization(cpw, momentum=0.9)
        cpw = tf.nn.leaky_relu(cpw, alpha=0.2)

        cpw = tf.layers.dense(cpw, dim_cpw * 3 * depth_cpw)
        cpw = tf.layers.batch_normalization(cpw, momentum=0.9)
        cpw = tf.nn.leaky_relu(cpw, alpha=0.2)
        cpw = tf.reshape(cpw, (-1, dim_cpw, 3, depth_cpw))

        cpw = tf.layers.conv2d_transpose(cpw,
                                         int(depth_cpw / 2),
                                         kernel_size,
                                         strides=(2, 1),
                                         padding='same')
        cpw = tf.layers.batch_normalization(cpw, momentum=0.9)
        cpw = tf.nn.leaky_relu(cpw, alpha=0.2)
        #        cpw += tf.random_normal(shape=tf.shape(cpw), stddev=noise_std)

        cpw = tf.layers.conv2d_transpose(cpw,
                                         int(depth_cpw / 4),
                                         kernel_size,
                                         strides=(2, 1),
                                         padding='same')
        cpw = tf.layers.batch_normalization(cpw, momentum=0.9)
        cpw = tf.nn.leaky_relu(cpw, alpha=0.2)
        #        cpw += tf.random_normal(shape=tf.shape(cpw), stddev=noise_std)

        cpw = tf.layers.conv2d_transpose(cpw,
                                         int(depth_cpw / 8),
                                         kernel_size,
                                         strides=(2, 1),
                                         padding='same')
        cpw = tf.layers.batch_normalization(cpw, momentum=0.9)
        cpw = tf.nn.leaky_relu(cpw, alpha=0.2)
        #        cpw += tf.random_normal(shape=tf.shape(cpw), stddev=noise_std)

        # Control points
        cp = tf.layers.conv2d(
            cpw, 1, (1, 2),
            padding='valid')  # batch_size x (bezier_degree+1) x 2 x 1
        cp = tf.nn.tanh(cp)
        cp = tf.squeeze(
            cp, axis=-1,
            name='control_point')  # batch_size x (bezier_degree+1) x 2

        # Weights
        w = tf.layers.conv2d(cpw, 1, (1, 3), padding='valid')
        w = tf.nn.sigmoid(w)  # batch_size x (bezier_degree+1) x 1 x 1
        w = tf.squeeze(w, axis=-1,
                       name='weight')  # batch_size x (bezier_degree+1) x 1

        # Parameters at data points
        db = tf.layers.dense(inputs, 1024)
        db = tf.layers.batch_normalization(db, momentum=0.9)
        db = tf.nn.leaky_relu(db, alpha=0.2)

        db = tf.layers.dense(db, 256)
        db = tf.layers.batch_normalization(db, momentum=0.9)
        db = tf.nn.leaky_relu(db, alpha=0.2)

        db = tf.layers.dense(db, n_points - 1)
        db = tf.nn.softmax(db)  # batch_size x (n_data_points-1)

        ub = tf.pad(db, [[0, 0], [1, 0]],
                    constant_values=0)  # batch_size x n_data_points
        ub = tf.cumsum(ub, axis=1)
        ub = tf.minimum(ub, 1)
        ub = tf.expand_dims(ub, axis=-1)  # 1 x n_data_points x 1

        # Bezier layer
        # Compute values of basis functions at data points
        num_control_points = bezier_degree + 1
        lbs = tf.tile(ub, [1, 1, num_control_points
                           ])  # batch_size x n_data_points x n_control_points
        pw1 = tf.range(0, num_control_points, dtype=tf.float32)
        pw1 = tf.reshape(pw1, [1, 1, -1])  # 1 x 1 x n_control_points
        pw2 = tf.reverse(pw1, axis=[-1])
        lbs = tf.add(tf.multiply(pw1, tf.log(lbs + EPSILON)),
                     tf.multiply(pw2, tf.log(1 - lbs + EPSILON))
                     )  # batch_size x n_data_points x n_control_points
        lc = tf.add(tf.lgamma(pw1 + 1), tf.lgamma(pw2 + 1))
        lc = tf.subtract(
            tf.lgamma(tf.cast(num_control_points, dtype=tf.float32)),
            lc)  # 1 x 1 x n_control_points
        lbs = tf.add(lbs, lc)  # batch_size x n_data_points x n_control_points
        bs = tf.exp(lbs)
        # Compute data points
        cp_w = tf.multiply(cp, w)
        dp = tf.matmul(bs, cp_w)  # batch_size x n_data_points x 2
        bs_w = tf.matmul(bs, w)  # batch_size x n_data_points x 1
        dp = tf.div(dp, bs_w)  # batch_size x n_data_points x 2
        dp = tf.expand_dims(
            dp, axis=-1, name='x_fake')  # batch_size x n_data_points x 2 x 1

        return dp, cp, w
예제 #49
0
def log_gamma(x, a1, b1):
    yout = tf.reduce_sum(
        a1 * tf.log(b1) - tf.lgamma(a1)
        + (a1 - 1.) * tf.log(x) - b1 * x
    )
    return yout
예제 #50
0
    def build_loss(self, seqs_repr, data_ops):
        """Convert per-location real-valued predictions to a loss."""
        # targets
        tstart = self.batch_buffer // self.target_pool
        tend = (self.batch_length - self.batch_buffer) // self.target_pool

        targets = data_ops['label']
        targets = tf.identity(targets[:, tstart:tend, :], name='targets_op')

        # work-around for specifying my own predictions
        self.preds_adhoc = tf.placeholder(tf.float32,
                                          shape=seqs_repr.shape,
                                          name='preds-adhoc')

        # choose link
        if self.link in ['identity', 'linear']:
            self.preds_op = tf.identity(seqs_repr, name='preds')

        elif self.link == 'relu':
            self.preds_op = tf.relu(seqs_repr, name='preds')

        elif self.link == 'exp':
            self.preds_op = tf.exp(tf.clip_by_value(seqs_repr, -50, 50),
                                   name='preds')

        elif self.link == 'exp_linear':
            self.preds_op = tf.where(seqs_repr > 0,
                                     seqs_repr + 1,
                                     tf.exp(
                                         tf.clip_by_value(seqs_repr, -50, 50)),
                                     name='preds')

        elif self.link == 'softplus':
            self.preds_op = tf.nn.softplus(seqs_repr, name='preds')

        elif self.link == 'softmax':
            # performed in the loss function, but saving probabilities
            self.preds_prob = tf.nn.softmax(seqs_repr, name='preds')

        else:
            print('Unknown link function %s' % self.link, file=sys.stderr)
            exit(1)

        # clip
        if self.target_clip is not None:
            self.preds_op = tf.clip_by_value(self.preds_op, 0,
                                             self.target_clip)
            targets = tf.clip_by_value(targets, 0, self.target_clip)

        # sqrt
        if self.target_sqrt:
            self.preds_op = tf.sqrt(self.preds_op)
            targets = tf.sqrt(targets)

        loss_op = None
        loss_adhoc = None
        # choose loss
        if self.loss == 'gaussian':
            loss_op = tf.squared_difference(self.preds_op, targets)
            loss_adhoc = tf.squared_difference(self.preds_adhoc, targets)

        elif self.loss == 'poisson':
            loss_op = tf.nn.log_poisson_loss(targets,
                                             tf.log(self.preds_op),
                                             compute_full_loss=True)
            loss_adhoc = tf.nn.log_poisson_loss(targets,
                                                tf.log(self.preds_adhoc),
                                                compute_full_loss=True)

        elif self.loss == 'negative_binomial':
            # define overdispersion alphas
            self.alphas = tf.get_variable(
                'alphas',
                shape=[self.num_targets],
                initializer=tf.constant_initializer(-5),
                dtype=tf.float32)
            self.alphas = tf.nn.softplus(tf.clip_by_value(
                self.alphas, -50, 50))
            tf.summary.histogram('alphas', self.alphas)
            for ti in np.linspace(0, self.num_targets - 1, 10).astype('int'):
                tf.summary.scalar('alpha_t%d' % ti, self.alphas[ti])

            # compute w/ inverse
            k = 1. / self.alphas

            # expand k
            k_expand = tf.tile(k, [self.batch_size * seq_length])
            k_expand = tf.reshape(
                k_expand, (self.batch_size, seq_length, self.num_targets))

            # expand lgamma(k)
            lgk_expand = tf.tile(tf.lgamma(k), [self.batch_size * seq_length])
            lgk_expand = tf.reshape(
                lgk_expand, (self.batch_size, seq_length, self.num_targets))

            # construct loss
            loss1 = targets * tf.log(self.preds_op /
                                     (self.preds_op + k_expand))
            loss2 = k_expand * tf.log(k_expand / (self.preds_op + k_expand))
            loss3 = tf.lgamma(targets + k_expand) - lgk_expand
            loss_op = -(loss1 + loss2 + loss3)

            # adhoc
            loss1 = targets * tf.log(self.preds_adhoc /
                                     (self.preds_adhoc + k_expand))
            loss2 = k_expand * tf.log(k_expand / (self.preds_adhoc + k_expand))
            loss_adhoc = -(loss1 + loss2 + loss3)

        elif self.loss == 'negative_binomial_hilbe':
            # define overdispersion alphas
            self.alphas = tf.get_variable(
                'alphas',
                shape=[self.num_targets],
                initializer=tf.constant_initializer(-5),
                dtype=tf.float32)
            self.alphas = tf.exp(tf.clip_by_value(self.alphas, -50, 50))

            # expand
            alphas_expand = tf.tile(self.alphas,
                                    [self.batch_size * seq_length])
            alphas_expand = tf.reshape(
                alphas_expand, (self.batch_size, seq_length, self.num_targets))

            # construct loss
            loss1 = targets * tf.log(self.preds_op)
            loss2 = (alphas_expand * targets + 1) / alphas_expand
            loss3 = tf.log(alphas_expand * self.preds_op + 1)
            loss_op = -loss1 + loss2 * loss3

            # adhoc
            loss1 = targets * tf.log(self.preds_adhoc)
            loss3 = tf.log(alphas_expand * self.preds_adhoc + 1)
            loss_adhoc = -loss1 + loss2 * loss3

        elif self.loss == 'gamma':
            # jchan document
            loss_op = targets / self.preds_op + tf.log(self.preds_op)
            loss_adhoc = targets / self.preds_adhoc + tf.log(self.preds_adhoc)

        elif self.loss == 'cross_entropy':
            loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=(targets - 1), logits=self.preds_op)
            loss_adhoc = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=(targets - 1), logits=self.preds_adhoc)

        else:
            print('Cannot identify loss function %s' % self.loss)
            exit(1)

        # set NaN's to zero
        # loss_op = tf.boolean_mask(loss_op, tf.logical_not(self.targets_na[:,tstart:tend]))

        # reduce lossses by batch and position
        loss_op = tf.reduce_mean(loss_op, axis=[0, 1], name='target_loss')
        loss_op = tf.check_numerics(loss_op, 'Invalid loss', name='loss_check')

        loss_adhoc = tf.reduce_mean(loss_adhoc,
                                    axis=[0, 1],
                                    name='target_loss_adhoc')
        tf.summary.histogram('target_loss', loss_op)
        for ti in np.linspace(0, self.num_targets - 1, 10).astype('int'):
            tf.summary.scalar('loss_t%d' % ti, loss_op[ti])
        self.target_losses = loss_op
        self.target_losses_adhoc = loss_adhoc

        # define target sigmas
        """
        self.target_sigmas = tf.get_variable('target_sigmas',
        shape=[self.num_targets], initializer=tf.constant_initializer(2),
        dtype=tf.float32)
        self.target_sigmas =
        tf.nn.softplus(tf.clip_by_value(self.target_sigmas,-50,50))
        tf.summary.histogram('target_sigmas', self.target_sigmas)
        for ti in np.linspace(0,self.num_targets-1,10).astype('int'):
            tf.summary.scalar('sigma_t%d'%ti, self.target_sigmas[ti])
        # self.target_sigmas = tf.ones(self.num_targets) / 2.
        """

        # dot losses target sigmas
        # loss_op = loss_op / (2*self.target_sigmas)
        # loss_adhoc = loss_adhoc / (2*self.target_sigmas)

        # fully reduce
        loss_op = tf.reduce_mean(loss_op, name='loss')
        loss_adhoc = tf.reduce_mean(loss_adhoc, name='loss_adhoc')

        # add extraneous terms
        loss_op += self.weights_regularizers  # + tf.reduce_mean(tf.log(self.target_sigmas))
        loss_adhoc += self.weights_regularizers  # + tf.reduce_mean(tf.log(self.target_sigmas))

        # track
        tf.summary.scalar('loss', loss_op)
        self.targets_op = targets
        return loss_op, loss_adhoc
예제 #51
0
if Layers >= 3:
    theta3_s = tf.stop_gradient(theta3)
    theta3T_s = tf.stop_gradient(theta3T)
    theta_alpha3_s = tf.stop_gradient(theta_alpha3)
    theta_beta3_s = tf.stop_gradient(theta_beta3)
    c4_s = tf.stop_gradient(c4)
    c_alpha4_s = tf.stop_gradient(c_alpha4)
    c_beta4_s = tf.stop_gradient(c_beta4)
    phi3_s = tf.stop_gradient(phi3)
    phi_alpha3_s = tf.stop_gradient(phi_alpha3)
r_s = tf.stop_gradient(r)
r_alpha_s = tf.stop_gradient(r_alpha)
r_beta_s = tf.stop_gradient(r_beta)

# Calculate ELBO
Loglike = tf.reduce_sum(x * tf.log(phi_theta1) - phi_theta1 - tf.lgamma(x + 1.)) / (K[0] * batch_size)
# Layer 1
if Layers == 1:
    pta_phi_theta2 = tf.transpose(r)
    pta_phi_theta2T = tf.transpose(r)
else:
    pta_phi_theta2 = phi_theta2
    pta_phi_theta2T = phi_theta2T
pta_phi_theta2_s = tf.stop_gradient(pta_phi_theta2)
pta_phi_theta2T_s = tf.stop_gradient(pta_phi_theta2T)

ELBO = Loglike + log_gamma_minus(theta1T, pta_phi_theta2T_s, c2_s, theta_alpha1_s, theta_beta1_s) / (K[0] * batch_size)
ELBO = ELBO + log_dirichlet_minus(phi1, eta_l[1], phi_alpha1_s) / (K[0] * rho_mb * batch_size)
ELBO = ELBO + log_gamma_minus(theta1T_s, pta_phi_theta2T_s, c2, theta_alpha1_s, theta_beta1_s) / (K[0] * batch_size) \
       + prior * log_gamma_minus(c2, e0, f0, c_alpha2_s, c_beta2_s) / (K[0] * batch_size)
    def __init__(self, conf):
        """ Set up remaining layers, objective and loss functions, gradient 
        compute and apply ops, network parameter synchronization ops, and 
        summary ops. """

        super(PolicyVNetwork, self).__init__(conf)
        
        self.beta = conf['args'].entropy_regularisation_strength
        self.use_recurrent = conf['args'].alg_type == 'a3c-lstm'
                
        with tf.name_scope(self.name):

            self.critic_target_ph = tf.placeholder(
                'float32', [None], name='target')
            self.adv_actor_ph = tf.placeholder("float", [None], name='advantage')

            if self.use_recurrent:
                layer_name = 'lstm_layer'
                self.hidden_state_size = 256
                with tf.variable_scope(self.name+'/'+layer_name) as vs:
                    self.lstm_cell = CustomBasicLSTMCell(self.hidden_state_size, forget_bias=1.0)

                    self.step_size = tf.placeholder(tf.float32, [1], name='step_size')
                    self.initial_lstm_state = tf.placeholder(
                        tf.float32, [1, 2*self.hidden_state_size], name='initital_state')
                    
                    ox_reshaped = tf.reshape(self.ox, [1,-1,256])
                    lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn(
                        self.lstm_cell,
                        ox_reshaped,
                        initial_state=self.initial_lstm_state,
                        sequence_length=self.step_size,
                        time_major=False,
                        scope=vs)

                    self.ox = tf.reshape(lstm_outputs, [-1,256])

                    # Get all LSTM trainable params
                    self.lstm_trainable_variables = [v for v in 
                        tf.trainable_variables() if v.name.startswith(vs.name)]


            # Final actor layer
            layer_name = 'softmax_policy4'            
            self.wpi, self.bpi, self.output_layer_pi, self.log_output_layer_pi = layers.softmax_and_log_softmax(
                layer_name, self.ox, self.num_actions)

            # Compute Poisson x Discrete Gaussian
            self.t = tf.placeholder(tf.float32, num_actions)
            self.l = tf.placeholder(tf.float32, num_actions)
            k = np.array(list(range(10)))
            t_hat = tf.log(1+tf.exp(self.t))
            l_hat = tf.log(1+tf.exp(self.l))
            self.action_repeat_probs = tf.exp(-(k-l_hat)**2/(2*t_hat) - l_hat - tf.lgamma(k+1)) * l_hat**k
            self.log_action_repeat_probs = -(k-l_hat)**2/(2*t_hat) - l_hat - tf.lgamma(k+1) + k*tf.log(l_hat)

            self.selected_repeat = self.placeholder(tf.int32)
            self.selected_repeat_prob = self.action_repeat_probs[self.selected_repeat]
            
            # Entropy: ∑_a[-p_a ln p_a]
            self.output_layer_entropy = tf.reduce_sum(
                - 1.0 * tf.multiply(
                    self.output_layer_pi * self.action_repeat_probs,
                    self.log_output_layer_pi + self.log_action_repeat_probs
                ), axis=1)
            self.entropy = tf.reduce_mean(self.output_layer_entropy)

            
            # Final critic layer
            self.wv, self.bv, self.output_layer_v = layers.fc(
                'fc_value4', self.ox, 1, activation='linear')

            self.params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)


            # Advantage critic
            self.adv_critic = tf.subtract(self.critic_target_ph, tf.reshape(self.output_layer_v, [-1]))
            
            # Actor objective
            # Multiply the output of the network by a one hot vector, 1 for the 
            # executed action. This will make the non-regularised objective 
            # term for non-selected actions to be zero.
            self.log_output_selected_action = tf.reduce_sum(
                (self.log_output_layer_pi + self.selected_repeat_prob) * self.selected_action_ph, 
                axis=1
            )
            actor_objective_advantage_term = tf.multiply(
                self.log_output_selected_action, self.adv_actor_ph
            )
            actor_objective_entropy_term = self.beta * self.output_layer_entropy
            self.actor_objective = -tf.reduce_mean(
                actor_objective_advantage_term
                + actor_objective_entropy_term
            )
            
            # Critic loss
            if self.clip_loss_delta > 0:
                quadratic_part = tf.reduce_mean(tf.pow(
                    tf.minimum(
                        tf.abs(self.adv_critic), self.clip_loss_delta
                    ), 2))
                linear_part = tf.subtract(tf.abs(self.adv_critic), quadratic_part)
                #OBS! For the standard L2 loss, we should multiply by 0.5. However, the authors of the paper
                # recommend multiplying the gradients of the V function by 0.5. Thus the 0.5 
                self.critic_loss = tf.multiply(tf.constant(0.5), tf.nn.l2_loss(quadratic_part) + \
                    self.clip_loss_delta * linear_part)
            else:
                self.critic_loss = 0.5 * tf.reduce_mean(tf.pow(self.adv_critic, 2))          
            
            self.loss = self.actor_objective + self.critic_loss
            
            # Optimizer
            grads = tf.gradients(self.loss, self.params)

            if self.clip_norm_type == 'ignore':
                # Unclipped gradients
                self.get_gradients = grads
            elif self.clip_norm_type == 'global':
                # Clip network grads by network norm
                self.get_gradients = tf.clip_by_global_norm(
                            grads, self.clip_norm)[0]
            elif self.clip_norm_type == 'local':
                # Clip layer grads by layer norm
                self.get_gradients = [tf.clip_by_norm(
                            g, self.clip_norm) for g in grads]

            # Placeholders for shared memory vars
            self.params_ph = []
            for p in self.params:
                self.params_ph.append(tf.placeholder(tf.float32, 
                    shape=p.get_shape(), 
                    name="shared_memory_for_{}".format(
                        (p.name.split("/", 1)[1]).replace(":", "_"))))
            
            # Ops to sync net with shared memory vars
            self.sync_with_shared_memory = []
            for i in range(len(self.params)):
                self.sync_with_shared_memory.append(
                    self.params[i].assign(self.params_ph[i]))
예제 #53
0
 def gammaln(self, x):
     return tf.lgamma(x)
예제 #54
0
def poisson(x, lam):
    return x * tf.log(lam) - lam - tf.lgamma(x + 1.)
예제 #55
0
def main():
    argc = len(sys.argv)
    if (argc != 2):
        print("Usage: [Data.csv]")
        exit()

    ## Read Data from file into a data frame
    DATA = sys.argv[1]
    data_df = pd.read_csv(DATA)
    num_data_columns = len(data_df.columns)
    num_dim = num_data_columns
    hypergeom_p = 1
    hypergeom_q = hypergeom_p - 1
    session_string = ''.join(
        random.choice(string.ascii_uppercase + string.digits)
        for _ in range(10))
    print(
        "Data appears to have {} columns. Assuming multivariate probability distribution with {} varaibles."
        .format(num_data_columns, num_data_columns))
    log_file_string = "MELL_log_{}.txt".format(session_string)
    ## A random string for this session deatils

    ## Set to floating point data
    data = data_df.astype('float64')
    num_rows = data.shape[0]
    print("There are {} rows".format(num_rows))

    ##########################
    ## Inputs and constants ##
    ##########################
    EPOCHS = 1
    n_batches = 50
    model_weight = 1.0
    normalisation_weight = 100.0
    #R_squared_weight     = 1.0
    #intercept_weight     = 1.0
    #gradient_weight      = 100.0

    # Create a placeholder to dynamically switch between batch sizes
    batch_size = tf.placeholder(tf.int64)
    drop_prob = tf.placeholder(tf.float32)

    ## Training Boolean
    training_bool = tf.placeholder(tf.bool)

    ##################
    ## Initializers ##
    ##################
    weight_initialiser_mean = 0.0
    weight_initialiser_std = 0.000000001

    ## Question do we need a separate initializer for everything?
    #weight_initer = tf.truncated_normal_initializer(mean=weight_initialiser_mean, stddev=weight_initialiser_std)
    #weight_initer = tf.constant_initializer(np.eye(num_gamma,num_dim),dtype=tf.float32)
    #avec_initer = tf.random_uniform_initializer(minval=0.90,maxval=1.1,dtype=tf.float32)
    #alpha_initer = tf.truncated_normal_initializer(mean=0.0, stddev=0.5)
    #beta_initer = tf.truncated_normal_initializer(mean=0.0, stddev=0.5)
    #q_initer = tf.constant_initializer(np.eye(num_dim),dtype=tf.float32)

    #matrix_M_init = tf.constant_initializer(np.eye(num_dim,num_dim),dtype=tf.float32)
    matrix_V_init = tf.truncated_normal_initializer(
        mean=weight_initialiser_mean, stddev=weight_initialiser_std)
    matrix_W_init = tf.truncated_normal_initializer(
        mean=weight_initialiser_mean, stddev=weight_initialiser_std)
    param_a_init = tf.truncated_normal_initializer(mean=1.0,
                                                   stddev=0.000000001)
    param_b_init = tf.truncated_normal_initializer(mean=1.0, stddev=0.1)
    eta_init = tf.truncated_normal_initializer(mean=1.0, stddev=0.1)

    ########################
    ## Input Placeholders ##
    ########################
    ## The data objects, x input data, S set of input exponents
    x = tf.placeholder(tf.float32, shape=[None, num_dim])
    S = tf.placeholder(tf.float32, shape=[None, num_dim])

    ##########################
    ## Restart placeholders ## For continuing and restarting if errors occur
    ##########################
    #eta_reset_input = tf.placeholder( tf.float32, shape=[num_dim])
    #q_reset_input = tf.placeholder( tf.float32, shape=[num_dim,num_dim])
    #w_reset_input = tf.placeholder( tf.float32, shape=[num_gamma, num_dim])
    #alpha_reset_input = tf.placeholder( tf.float32, shape=[num_gamma])
    #beta_reset_input = tf.placeholder( tf.float32, shape=[num_dim])
    #a_reset_input = tf.placeholder( tf.float32, shape=[num_gamma])

    ############################
    ## Iterators and features ##
    ############################
    ds_buffer_constant = 1000
    dataset = tf.data.Dataset.from_tensor_slices(x).shuffle(
        buffer_size=ds_buffer_constant).batch(batch_size).repeat()
    iter = tf.data.Iterator.from_structure(dataset.output_types,
                                           dataset.output_shapes)
    features = iter.get_next()
    dataset_init_op = iter.make_initializer(dataset)

    print(
        "TO ADD: Scale parameter exponenets must equal variable exponenets in the series expansion via GRMT!"
    )
    print(
        "TO ADD: Write best models to file as we go (periodically to not loose data) : Checkpointing"
    )
    print("TO ADD: Parameter files : Check it works first!")
    print("TO ADD: Add initialiser constants to log")
    print("Put a condition on the sign of the moment?")
    print("Num gammas must be greater or equal to num dim")
    print(
        "Treat some of the gammas as a subset which are a function of the scale weights"
    )
    print(
        "How does GMRT det A come into this? will there be an additional scaling factor?"
    )
    print(
        "Complex network and ability to minimise the complex parts in real predictions? Then we can include the (-1)^(q.s+beta) kind of terms"
    )
    print(
        "Start with a product of marginals (which may be quicker/easier to train) then start with an expression that represents that product for the full distribution"
    )
    print("#####")
    print(
        "Check with simple function! How close can loss get to zero? Can we model this with a noise distribution?"
    )
    print(
        "Can we find the s which maximises the error for a given function? Can we work on that s only?"
    )
    print(
        "Consider a loss function that is signed for small errors such that they cancel if they are noise but do not if they are large?"
    )
    print("For syymetry use the sum of the above function flipped!?")
    print("Check Autonorm is working correctly")
    print("Check Error expectations are working correctly")
    print("Check range 0-1 in s is acceptable?")

    ## Make a log of parameters
    with open(log_file_string, "w") as f:
        f.write("Session {} created {}\n".format(session_string,
                                                 datetime.datetime.now()))
        f.write("Data of shape {}\n".format(data.shape))
        f.write("Number of dimensions = {}\n".format(num_dim))
        f.write("Hypergeometric p = {}\n".format(hypergeom_p))
        f.write("Hypergeometric q = {}\n".format(hypergeom_q))
        f.write("EPOCHS = {}\n".format(EPOCHS))
        f.write("n_batches = {}\n".format(EPOCHS))
        f.write("normalisation_weight = {}\n".format(normalisation_weight))
        #f.write("intercept_weight = {}\n".format(intercept_weight))
        #f.write("gradient_weight = {}\n".format(gradient_weight))
        #f.write("R_squared_weight = {}\n".format(R_squared_weight))
        f.write("ds_buffer_constant = {}\n".format(ds_buffer_constant))

    #####################
    ## Model Variables ##
    #####################
    eta = tf.get_variable(name="eta",
                          dtype=tf.float32,
                          shape=[num_dim],
                          initializer=eta_init)
    matrix_V = tf.get_variable(name="V",
                               dtype=tf.float32,
                               shape=[hypergeom_p, num_dim],
                               initializer=matrix_V_init)
    matrix_W = tf.get_variable(name="W",
                               dtype=tf.float32,
                               shape=[hypergeom_q, num_dim],
                               initializer=matrix_W_init)
    param_a = tf.get_variable(name="a",
                              dtype=tf.float32,
                              shape=[hypergeom_p],
                              initializer=param_a_init)
    param_b = tf.get_variable(name="b",
                              dtype=tf.float32,
                              shape=[hypergeom_q],
                              initializer=param_b_init)

    eta_input = tf.placeholder(dtype=tf.float32, shape=[num_dim])
    param_a_input = tf.placeholder(dtype=tf.float32, shape=[hypergeom_p])
    param_b_input = tf.placeholder(dtype=tf.float32, shape=[hypergeom_q])
    matrix_V_input = tf.placeholder(dtype=tf.float32,
                                    shape=[hypergeom_p, num_dim])
    matrix_W_input = tf.placeholder(dtype=tf.float32,
                                    shape=[hypergeom_q, num_dim])

    print("CHECK: use locking status and effect?")
    eta_assign_op = tf.assign(eta, eta_input, use_locking=False)
    param_a_assign_op = tf.assign(param_a, param_a_input, use_locking=False)
    param_b_assign_op = tf.assign(param_b, param_b_input, use_locking=False)
    matrix_V_assign_op = tf.assign(matrix_V, matrix_V_input, use_locking=False)
    matrix_W_assign_op = tf.assign(matrix_W, matrix_W_input, use_locking=False)

    #####################
    ## Model Equations ##
    #####################

    print("Warning rawmoments are being used!!")
    print("Optimisation: lgamma(snorm) = np.zeroes([num_dim])")
    print("Optimisation: tf.reduce_sum(np.zeroes([num_dim])) = 0")
    print(
        "NOTE: param_a elements must be bigger than the row sums of V for positive arguments to loggamma(x)!"
    )
    print(
        "NOTE: param_b elements must be bigger than the row sums of W for positive arguments to loggamma(x)!"
    )
    ## Return the normalised moment prediction for the input S
    logXi_a = tf.reduce_sum(
        tf.lgamma(param_a))  ## Hypergeometric p normalisation coeffs
    logXi_b = tf.reduce_sum(
        tf.lgamma(param_b))  ## Hypergeometric q normalisation coeffs
    ## Things that depend on the exponent sample
    logXi_s = tf.map_fn(lambda s: tf.reduce_sum(tf.lgamma(s)),
                        S,
                        dtype=tf.float32)
    logXi_2s = tf.map_fn(lambda s: tf.reduce_sum(tf.lgamma(s)),
                         2.0 * S,
                         dtype=tf.float32)
    logXi_a_minus_V_s = tf.map_fn(lambda s: tf.reduce_sum(
        tf.lgamma(param_a + tf.tensordot(matrix_V, s, axes=[[1], [0]]))),
                                  S,
                                  dtype=tf.float32)
    logXi_b_minus_W_s = tf.map_fn(lambda s: tf.reduce_sum(
        tf.lgamma(param_b + tf.tensordot(matrix_W, s, axes=[[1], [0]]))),
                                  S,
                                  dtype=tf.float32)
    logXi_a_minus_V_2s = tf.map_fn(lambda s: tf.reduce_sum(
        tf.lgamma(param_a + tf.tensordot(matrix_V, s, axes=[[1], [0]]))),
                                   2.0 * S,
                                   dtype=tf.float32)
    logXi_b_minus_W_2s = tf.map_fn(lambda s: tf.reduce_sum(
        tf.lgamma(param_b + tf.tensordot(matrix_W, s, axes=[[1], [0]]))),
                                   2.0 * S,
                                   dtype=tf.float32)
    ## Calculate the log of the analytic moments (as a vector over samples in S)
    #logM   = logXi_b + logXi_a_minus_V_s + logXi_s - logXi_a - logXi_b_minus_W_s
    #logM2s = logXi_b + logXi_a_minus_V_2s + logXi_2s - logXi_a - logXi_b_minus_W_2s
    s_norm = tf.ones(num_dim)
    #logXi_s_norm           = tf.reduce_sum(tf.lgamma(s_norm))
    logXi_a_minus_V_s_norm = tf.reduce_sum(
        tf.lgamma(param_a + tf.tensordot(matrix_V, s_norm, axes=[[1], [0]])))
    logXi_b_minus_W_s_norm = tf.reduce_sum(
        tf.lgamma(param_b + tf.tensordot(matrix_W, s_norm, axes=[[1], [0]])))
    norm_logM = logXi_b + logXi_a_minus_V_s_norm - logXi_a - logXi_b_minus_W_s_norm
    norm_M = tf.exp(
        norm_logM)  ## Get the raw moment for normalisation (dangerous?)

    logM_diff = logXi_a_minus_V_s + logXi_s - logXi_b_minus_W_s - logXi_a_minus_V_s_norm + logXi_b_minus_W_s_norm
    M = tf.exp(logM_diff)  ## Get the raw moment (dangerous?)

    logM2s_diff = logXi_a_minus_V_2s + logXi_2s - logXi_b_minus_W_2s - logXi_a_minus_V_s_norm + logXi_b_minus_W_s_norm
    M2s = tf.exp(logM2s_diff)  ## Get the raw moment (dangerous?)

    #std = tf.sqrt(M2s - tf.square(M)) ## Calculate the std
    #std_error = std/tf.sqrt(1.0*num_rows)  ## Divide by the number of samples to get the expected margin under this estimator

    M_var = tf.subtract(M2s, tf.square(M))
    #error_term = tf.divide(M2s - tf.square(M), 1.0*num_rows)

    print("Warning, messed with normalisation!!")
    norm_target = tf.constant(1.0, dtype=tf.float32)

    #########################################################################################
    ## Values Derived From Data (The things we are trying to fit the analytic function to) ##
    #########################################################################################
    data_exponents = tf.map_fn(lambda s: tf.subtract(s, tf.constant(1.0)), S)
    data_moments = tf.map_fn(lambda s: tf.pow(features, s), data_exponents)
    E = tf.map_fn(lambda mom: tf.reduce_mean(tf.reduce_prod(mom, axis=1)),
                  data_moments,
                  dtype=tf.float32)  ## Careful of overflow here

    ##reduce to variance
    def reduce_var(x, axis=None, keepdims=False):
        m = tf.reduce_mean(x, axis=axis, keep_dims=True)
        devs_squared = tf.square(x - m)
        return tf.reduce_mean(devs_squared, axis=axis, keep_dims=keepdims)

    #E_var = tf.map_fn( lambda mom : reduce_var(tf.reduce_prod(mom,axis=1)), data_moments, dtype = tf.float32 ) ## Careful of overflow here

    #logE = tf.log(E)  ## Possibly useless

    ###################
    ## Loss function ##
    ###################

    ##Caluclate the R^2 coefficient of the set of S datapoints
    #mean_log_E = tf.reduce_mean(logEfunc)
    #mean_log_M = tf.reduce_mean(logMfunc)
    #total_error = tf.reduce_sum(tf.square(tf.subtract(logEfunc,mean_log_E)))
    #unexplained_error = tf.reduce_sum(tf.square(tf.subtract(logEfunc, logMfunc)))
    #R_squared = tf.subtract(tf.constant(1.0,dtype=tf.float32), tf.divide(unexplained_error, total_error))

    #logM_error = tf.reduce_sum(tf.square(tf.subtract(logMfunc,mean_log_M)))
    #derived_gradient  = tf.divide(tf.reduce_sum(tf.multiply(logM_error,total_error)),tf.reduce_sum(tf.multiply(logM_error,logM_error)))
    #derived_intercept = mean_log_E - tf.multiply(derived_gradient, mean_log_M)

    ## Define the losses here ##
    #intercept_loss = intercept_weight     * tf.losses.mean_squared_error(derived_intercept,tf.constant(0.0,dtype=tf.float32))
    #gradient_loss  = gradient_weight      * tf.losses.mean_squared_error(derived_gradient,tf.constant(1.0,dtype=tf.float32))
    #R_squared_loss = R_squared_weight     * tf.losses.mean_squared_error(R_squared,tf.constant(1.0,dtype=tf.float32))

    ## Get the loss associated with points beyond the standard error
    #model_loss = model_weight         * tf.reduce_sum(tf.maximum(0.0, tf.square(M-E) - error_term ))
    #var_weight = tf.constant(1.0)

    model_loss = model_weight * tf.losses.mean_squared_error(M, E)
    norm_loss = normalisation_weight * tf.losses.mean_squared_error(
        norm_M, norm_target)
    #var_loss = tf.multiply(var_weight,tf.losses.mean_squared_error(M_var,E_var))

    #gamma_loss = integer_gamma_weight*tf.reduce_sum(tf.map_fn(lambda i : tf.abs(tf.abs(i)*(tf.abs(i)-1)),a))

    ## These require a definition of s which are allowed (can this be for s in 1 to 2?)
    #positive_argument_a_V_loss = tf.    tf.max()
    #positive_argument_b_W_loss = ...

    ## Define the total loss
    loss = model_loss + norm_loss
    #+ R_squared_loss + intercept_loss + gradient_loss

    print("Potential issue using one optimizer for different routines")
    optimiser = tf.train.AdamOptimizer()
    #optimiser = tf.train.GradientDescentOptimizer(0.001)

    ## Define training operations for independant sub-losses
    train_op = optimiser.minimize(loss)
    norm_op = optimiser.minimize(norm_loss)
    #model_op     = optimiser.minimize(model_loss)
    #R_squared_op = optimiser.minimize(R_squared_loss)
    #gradient_op  = optimiser.minimize(gradient_loss)
    #intercept_op = optimiser.minimize(intercept_loss)

    num_best_models = 1
    best_models_list = []
    largest_min_loss = 1e90

    def queue_compare(current_list, contender):
        clip_num = num_best_models
        if (len(current_list) < clip_num):
            current_list.append(contender)
        else:
            ## Seach for the correct place, add the contender in there and pop the end of the list till clip num elements
            index = clip_num
            for i in range(len(current_list)):
                if (contender[0] < current_list[i][0]):
                    index = i
                    break
            if (i < clip_num): current_list.insert(index, contender)
            while (len(current_list) > clip_num):
                current_list.pop(-1)
        return sorted(current_list, key=lambda x: x[0])

    ## TF graph write for chart viewing
    writer = tf.summary.FileWriter('./graphs', tf.get_default_graph())

    num_train_exponent_samples = 200

    def get_exponent_samples():
        samps = np.ones([num_dim]) + np.random.uniform(
            0.0, 4.0, size=[num_train_exponent_samples, num_dim])
        drop_prob = 0.2
        for i in range(len(samps)):
            for j in range(len(samps[i])):
                if (np.random.uniform(0, 1) < drop_prob): samps[i][j] = 1.0
        return samps

    #############
    ## Session ##
    #############
    epoch_min_max_thresholds = []
    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())
        for epoch in range(EPOCHS):

            if (epoch % 5 == 0): s_list = get_exponent_samples()
            sess.run(dataset_init_op,
                     feed_dict={
                         x: data,
                         S: s_list,
                         batch_size: num_rows,
                         drop_prob: 0.5,
                         training_bool: True
                     })

            tot_loss = 0.0
            for _ in range(n_batches):
                #_, loss_value, prlogE, prlogM, prnorm_logM, ml, nl, rl, il, grl = sess.run([train_op, loss, logE, logM, norm_logM, model_loss, norm_loss, R_squared_loss, intercept_loss, gradient_loss], feed_dict={ S:s_list, drop_prob : 0.5, training_bool : True})
                #_, loss_value, prnorm_M, ml, nl = sess.run([train_op, loss, norm_M, model_loss, norm_loss], feed_dict={ S:s_list, batch_size : num_rows, drop_prob : 0.5, training_bool : True})
                loss_value, prnorm_M, ml, nl = sess.run(
                    [loss, norm_M, model_loss, norm_loss],
                    feed_dict={
                        S: s_list,
                        batch_size: num_rows,
                        drop_prob: 0.5,
                        training_bool: True
                    })
                tot_loss += loss_value
            print("Iter: {}, Loss: {:.6f}".format(epoch, tot_loss / n_batches))
            print("norm M = {}".format(prnorm_M))
            print("model, norm = {},{}".format(ml, nl))
            #print(test)
            #maxl = max(ml,nl,rl,il,grl)
            if (not np.isnan(tot_loss)
                    and tot_loss / n_batches < largest_min_loss):
                b_eta, b_W, b_V, b_a, b_b = sess.run(
                    [eta, matrix_W, matrix_V, param_a, param_b])
                best_models_list = queue_compare(
                    best_models_list,
                    [tot_loss / n_batches, [b_eta, b_W, b_V, b_a, b_b]])
                largest_min_loss = max(
                    [element[0] for element in best_models_list])
                smallest_min_loss = min(
                    [element[0] for element in best_models_list])
                print("BEST VALUE ADDED! Threshold = {} to {}".format(
                    smallest_min_loss, largest_min_loss))
                epoch_min_max_thresholds.append(
                    (epoch, smallest_min_loss, largest_min_loss))
            if (np.isnan(tot_loss)):
                print("\n\n RESETTING \n\n")
                sess.run(tf.global_variables_initializer())
                sess.run(eta_assign_op,
                         feed_dict={eta_input: best_models_list[0][1][0]})
                sess.run(matrix_W_assign_op,
                         feed_dict={matrix_W_input: best_models_list[0][1][1]})
                sess.run(matrix_V_assign_op,
                         feed_dict={matrix_V_input: best_models_list[0][1][2]})
                sess.run(param_a_assign_op,
                         feed_dict={param_a_input: best_models_list[0][1][3]})
                sess.run(param_b_assign_op,
                         feed_dict={param_b_input: best_models_list[0][1][4]})
            #elif(maxl==nl):
            #  for _ in range(10) : sess.run([norm_op], feed_dict={ S:s_list, drop_prob : 0.5, training_bool : True})
            #elif(maxl==rl):
            #  for _ in range(10) : sess.run([R_squared_op], feed_dict={ S:s_list, drop_prob : 0.5, training_bool : True})
            #elif(maxl==ml):
            #  for _ in range(10) : sess.run([model_op], feed_dict={ S:s_list, drop_prob : 0.5, training_bool : True})
            #elif(maxl==il):
            #  for _ in range(10) : sess.run([intercept_op], feed_dict={ S:s_list, drop_prob : 0.5, training_bool : True})
            #elif(maxl==grl):
            #  for _ in range(10) : sess.run([gradient_op], feed_dict={ S:s_list, drop_prob : 0.5, training_bool : True})

        print("Run finished Writing Files...")
        with open("Output_Mell_{}.txt".format(session_string), "w") as f:
            for i in range(500):
                s_list = [[1.0]] + np.random.exponential(scale=2.0,
                                                         size=[1, num_dim])
                E_obs, M_obs = sess.run([E, M],
                                        feed_dict={
                                            S: s_list,
                                            drop_prob: 0.5,
                                            training_bool: True
                                        })
                for sval in s_list[0]:
                    f.write("{},".format(sval))
                f.write("{},{}\n".format(E_obs[0], M_obs[0]))
        with open("Output_Mell_{}_Best_100_Models.txt".format(session_string),
                  "w") as f:
            for i in best_models_list:
                f.write("{}:{}\n".format(i[0], i[1]))

        ## Load the best model back in
        model = 0
        sess.run(eta_assign_op,
                 feed_dict={eta_input: best_models_list[model][1][0]})
        sess.run(matrix_W_assign_op,
                 feed_dict={matrix_W_input: best_models_list[model][1][1]})
        sess.run(matrix_V_assign_op,
                 feed_dict={matrix_V_input: best_models_list[model][1][2]})
        sess.run(param_a_assign_op,
                 feed_dict={param_a_input: best_models_list[model][1][3]})
        sess.run(param_b_assign_op,
                 feed_dict={param_b_input: best_models_list[model][1][4]})
        with open("Output_Best_Mell_{}.txt".format(session_string), "w") as f:
            for i in range(500):
                s_list = [[1.0]] + np.random.exponential(scale=2.0,
                                                         size=[1, num_dim])
                E_obs, M_obs = sess.run([E, M],
                                        feed_dict={
                                            S: s_list,
                                            drop_prob: 0.5,
                                            training_bool: True
                                        })
                for sval in s_list[0]:
                    f.write("{},".format(sval))
                f.write("{},{}\n".format(E_obs[0], M_obs[0]))

        ### Make a huge array of predictions using the best 100 models
        with open("Output_Best_Models_Mell_{}.txt".format(session_string),
                  "w") as f:
            for i in range(num_dim):
                f.write("s{},".format(i))
            f.write("E,")
            for i in range(num_best_models):
                f.write("M{},".format(i))
            f.write("\n")
            for i in range(500):
                s_list = [[1.0]] + np.random.exponential(scale=2.0,
                                                         size=[1, num_dim])
                for sval in s_list[0]:
                    f.write("{},".format(sval))
                E_obs = sess.run(E,
                                 feed_dict={
                                     S: s_list,
                                     drop_prob: 0.5,
                                     training_bool: True
                                 })
                f.write("{},".format(E_obs[0]))
                for model in range(num_best_models):
                    sess.run(
                        eta_assign_op,
                        feed_dict={eta_input: best_models_list[model][1][0]})
                    sess.run(matrix_W_assign_op,
                             feed_dict={
                                 matrix_W_input: best_models_list[model][1][1]
                             })
                    sess.run(matrix_V_assign_op,
                             feed_dict={
                                 matrix_V_input: best_models_list[model][1][2]
                             })
                    sess.run(param_a_assign_op,
                             feed_dict={
                                 param_a_input: best_models_list[model][1][3]
                             })
                    sess.run(param_b_assign_op,
                             feed_dict={
                                 param_b_input: best_models_list[model][1][4]
                             })
                    M_obs = sess.run(M,
                                     feed_dict={
                                         S: s_list,
                                         drop_prob: 0.5,
                                         training_bool: True
                                     })
                    f.write("{},".format(M_obs[0]))
                f.write("\n")

        ### Print parameters of best model into a file for reading
        with open("Output_params_{}.txt".format(session_string), "w") as f:
            for param in sess.run([eta, matrix_W, matrix_V, param_a, param_b]):
                f.write("{}\n".format(param))
    def _ais_gets_correct_log_normalizer(self,
                                         init,
                                         independent_chain_ndims,
                                         sess,
                                         feed_dict=None):
        counter = collections.Counter()

        def proposal_log_prob(x):
            counter['proposal_calls'] += 1
            event_dims = tf.range(independent_chain_ndims, tf.rank(x))
            return tf.reduce_sum(tfd.Normal(loc=0., scale=1.).log_prob(x),
                                 axis=event_dims)

        def target_log_prob(x):
            counter['target_calls'] += 1
            event_dims = tf.range(independent_chain_ndims, tf.rank(x))
            return self._log_gamma_log_prob(x, event_dims)

        if feed_dict is None:
            feed_dict = {}

        num_steps = 200

        def make_kernel(tlp_fn):
            return tfp.mcmc.HamiltonianMonteCarlo(target_log_prob_fn=tlp_fn,
                                                  step_size=0.5,
                                                  num_leapfrog_steps=2,
                                                  seed=45)

        _, ais_weights, _ = tfp.mcmc.sample_annealed_importance_chain(
            num_steps=num_steps,
            proposal_log_prob_fn=proposal_log_prob,
            target_log_prob_fn=target_log_prob,
            current_state=init,
            make_kernel_fn=make_kernel,
            parallel_iterations=1)

        # We have three calls because the calculation of `ais_weights` entails
        # another call to the `convex_combined_log_prob_fn`. We could refactor
        # things to avoid this, if needed (eg, b/72994218).
        self.assertAllEqual(dict(target_calls=3, proposal_calls=3), counter)

        event_shape = tf.shape(init)[independent_chain_ndims:]
        event_size = tf.reduce_prod(event_shape)

        log_true_normalizer = (-self._shape_param * tf.log(self._rate_param) +
                               tf.lgamma(self._shape_param))
        log_true_normalizer *= tf.cast(event_size, log_true_normalizer.dtype)

        log_estimated_normalizer = (tf.reduce_logsumexp(ais_weights) -
                                    np.log(num_steps))

        ratio_estimate_true = tf.exp(ais_weights - log_true_normalizer)
        ais_weights_size = tf.size(ais_weights)
        standard_error = tf.sqrt(
            _compute_sample_variance(ratio_estimate_true) /
            tf.cast(ais_weights_size, ratio_estimate_true.dtype))

        [
            ratio_estimate_true_,
            log_true_normalizer_,
            log_estimated_normalizer_,
            standard_error_,
            ais_weights_size_,
            event_size_,
        ] = sess.run([
            ratio_estimate_true,
            log_true_normalizer,
            log_estimated_normalizer,
            standard_error,
            ais_weights_size,
            event_size,
        ], feed_dict)

        tf.logging.vlog(
            1, '        log_true_normalizer: {}\n'
            '   log_estimated_normalizer: {}\n'
            '           ais_weights_size: {}\n'
            '                 event_size: {}\n'.format(
                log_true_normalizer_, log_estimated_normalizer_,
                ais_weights_size_, event_size_))
        self.assertNear(ratio_estimate_true_.mean(), 1., 4. * standard_error_)
예제 #57
0
파일: beta.py 프로젝트: nd1511/probability
 def _log_normalization(self):
     return (tf.lgamma(self.concentration1) +
             tf.lgamma(self.concentration0) -
             tf.lgamma(self.total_concentration))
예제 #58
0
 def _multi_lgamma(self, a, p, name="multi_lgamma"):
     """Computes the log multivariate gamma function; log(Gamma_p(a))."""
     with self._name_scope(name, values=[a, p]):
         seq = self._multi_gamma_sequence(a, p)
         return (0.25 * p * (p - 1.) * math.log(math.pi) +
                 tf.reduce_sum(tf.lgamma(seq), axis=[-1]))
예제 #59
0
 def variational_expectations(self, Fmu, Fvar, Y):
     if self.invlink is tf.exp:
         return Y * Fmu - tf.exp(Fmu + Fvar / 2) * self.binsize \
                - tf.lgamma(Y + 1) + Y * tf.log(self.binsize)
     return super(Poisson, self).variational_expectations(Fmu, Fvar, Y)
예제 #60
0
def gamma_loss(z_over_a, log_alpha, alpha, log_beta, beta):
    loss = -alpha * log_beta + (
        alpha + z_over_a) * tf.log(1 + beta) - tf.lgamma(
            alpha + z_over_a) + tf.lgamma(alpha) + tf.lgamma(z_over_a + 1)
    return K.mean(loss)