def beta_log_prob(self, val): conc0 = self.parameters['concentration0'] conc1 = self.parameters['concentration1'] result = (conc1 - 1.0) * tf.log(val) result += (conc0 - 1.0) * tf.log(1.0 - val) result += -tf.lgamma(conc1) - tf.lgamma(conc0) + tf.lgamma(conc1 + conc0) return result
def logpmf(self, x, n, p): """Log of the probability mass function. Parameters ---------- x : tf.Tensor A n-D tensor for n > 1, where the inner (right-most) dimension represents the multivariate dimension. Each element is the number of outcomes in a bucket and not a one-hot. n : tf.Tensor A tensor of one less dimension than ``x``, representing the number of outcomes, equal to sum x[i] along the inner (right-most) dimension. p : tf.Tensor A tensor of one less dimension than ``x``, representing probabilities which sum to 1. Returns ------- tf.Tensor A tensor of one dimension less than the input. """ x = tf.cast(x, dtype=tf.float32) n = tf.cast(n, dtype=tf.float32) p = tf.cast(p, dtype=tf.float32) multivariate_idx = len(get_dims(x)) - 1 if multivariate_idx == 0: return tf.lgamma(n + 1.0) - \ tf.reduce_sum(tf.lgamma(x + 1.0)) + \ tf.reduce_sum(x * tf.log(p)) else: return tf.lgamma(n + 1.0) - \ tf.reduce_sum(tf.lgamma(x + 1.0), multivariate_idx) + \ tf.reduce_sum(x * tf.log(p), multivariate_idx)
def logpdf(self, x, df, loc=0, scale=1): """Log of the probability density function. Parameters ---------- x : tf.Tensor A n-D tensor. df : tf.Tensor A tensor of same shape as ``x``, and with all elements constrained to :math:`df > 0`. loc : tf.Tensor A tensor of same shape as ``x``. scale : tf.Tensor A tensor of same shape as ``x``, and with all elements constrained to :math:`scale > 0`. Returns ------- tf.Tensor A tensor of same shape as input. """ x = tf.cast(x, dtype=tf.float32) df = tf.cast(df, dtype=tf.float32) loc = tf.cast(loc, dtype=tf.float32) scale = tf.cast(scale, dtype=tf.float32) z = (x - loc) / scale return tf.lgamma(0.5 * (df + 1.0)) - tf.lgamma(0.5 * df) - \ 0.5 * (tf.log(np.pi) + tf.log(df)) - tf.log(scale) - \ 0.5 * (df + 1.0) * tf.log(1.0 + (1.0/df) * tf.square(z))
def beta(alpha, beta, y): # need to clip y, since log of 0 is nan... y = tf.clip_by_value(y, 1e-6, 1-1e-6) return (alpha - 1.) * tf.log(y) + (beta - 1.) * tf.log(1. - y) \ + tf.lgamma(alpha + beta)\ - tf.lgamma(alpha)\ - tf.lgamma(beta)
def beta(x, alpha, beta): # need to clip x, since log of 0 is nan... x = tf.clip_by_value(x, 1e-6, 1-1e-6) return (alpha - 1.) * tf.log(x) + (beta - 1.) * tf.log(1. - x) \ + tf.lgamma(alpha + beta)\ - tf.lgamma(alpha)\ - tf.lgamma(beta)
def multinomial_log_prob(self, val): n = self.parameters['total_count'] probs = self.parameters['probs'] f_n = tf.cast(n, tf.float32) f_val = tf.cast(val, tf.float32) result = tf.reduce_sum(tf.log(probs) * f_val, -1) result += tf.lgamma(f_n + 1) - tf.reduce_sum(tf.lgamma(f_val + 1), -1) return result
def student_t(x, mean, scale, deg_free): const = tf.lgamma(tf.cast((deg_free + 1.) * 0.5, tf.float64))\ - tf.lgamma(tf.cast(deg_free * 0.5, tf.float64))\ - 0.5*(tf.log(tf.square(scale)) + tf.cast(tf.log(deg_free), tf.float64) + np.log(np.pi)) const = tf.cast(const, tf.float64) return const - 0.5*(deg_free + 1.) * \ tf.log(1. + (1. / deg_free) * (tf.square((x - mean) / scale)))
def binomial_log_prob(self, val): n = self.parameters['total_count'] probs = self.parameters['probs'] f_n = tf.cast(n, tf.float32) f_val = tf.cast(val, tf.float32) result = f_val * tf.log(probs) + (f_n - f_val) * tf.log(1.0 - probs) result += tf.lgamma(f_n + 1) - tf.lgamma(f_val + 1) - \ tf.lgamma(f_n - f_val + 1) return result
def log_nb_positive(x, mu, theta, eps=1e-8): """ log likelihood (scalar) of a minibatch according to a nb model. Variables: mu: mean of the negative binomial (has to be positive support) (shape: minibatch x genes) theta: inverse dispersion parameter (has to be positive support) (shape: minibatch x genes) eps: numerical stability constant """ res = tf.lgamma(x + theta) - tf.lgamma(theta) - tf.lgamma(x + 1) + x * tf.log(mu + eps) \ - x * tf.log(theta + mu + eps) + theta * tf.log(theta + eps) \ - theta * tf.log(theta + mu + eps) return tf.reduce_sum(res, axis=-1)
def chi2_log_prob(self, val): df = self.parameters['df'] eta = 0.5 * df - 1 result = tf.reduce_sum(eta * tf.log(val), -1) result += tf.exp(-0.5 * val) result -= tf.lgamma(eta + 1) + (eta + 1) * tf.log(2.0) return result
def gamma_log_prob(self, val): conc = self.parameters['concentration'] rate = self.parameters['rate'] result = (conc - 1.0) * tf.log(val) result -= rate * val result += -tf.lgamma(conc) + conc * tf.log(rate) return result
def inverse_gamma_log_prob(self, val): conc = self.parameters['concentration'] rate = self.parameters['rate'] result = -(conc + 1) * tf.log(val) result -= rate * tf.reciprocal(val) result += -tf.lgamma(conc) + conc * tf.log(rate) return result
def testPoissonLogPmfContinuousRelaxation(self): batch_size = 12 lam = tf.constant([3.0] * batch_size) x = np.array([-3., -0.5, 0., 2., 2.2, 3., 3.1, 4., 5., 5.5, 6., 7.]).astype( np.float32) poisson = self._make_poisson(rate=lam, interpolate_nondiscrete=True) expected_continuous_log_pmf = (x * poisson.log_rate - tf.lgamma(1. + x) - poisson.rate) neg_inf = tf.fill( tf.shape(expected_continuous_log_pmf), value=np.array(-np.inf, dtype=expected_continuous_log_pmf.dtype.as_numpy_dtype)) expected_continuous_log_pmf = tf.where(x >= 0., expected_continuous_log_pmf, neg_inf) expected_continuous_pmf = tf.exp(expected_continuous_log_pmf) log_pmf = poisson.log_prob(x) self.assertEqual(log_pmf.get_shape(), (batch_size,)) self.assertAllClose(self.evaluate(log_pmf), self.evaluate(expected_continuous_log_pmf)) pmf = poisson.prob(x) self.assertEqual(pmf.get_shape(), (batch_size,)) self.assertAllClose(self.evaluate(pmf), self.evaluate(expected_continuous_pmf))
def Poisson(lambda_, name=None): k = tf.placeholder(config.int_dtype, name=name) Distribution.logp = k*lambda_ - lambda_ + tf.lgamma(k+1) # TODO Distribution.integral = ... return k
def actual_hypersphere_volume(dims, radius): # https://en.wikipedia.org/wiki/Volume_of_an_n-ball # Using tf.lgamma because we'd have to otherwise use SciPy which is not # a required dependency of core. radius = np.asarray(radius) dims = tf.cast(dims, dtype=radius.dtype) return tf.exp((dims / 2.) * np.log(np.pi) - tf.lgamma(1. + dims / 2.) + dims * tf.log(radius))
def _log_unnormalized_prob(self, x): # The log-probability at negative points is always -inf. # Catch such x's and set the output value accordingly. safe_x = tf.maximum(x if self.interpolate_nondiscrete else tf.floor(x), 0.) y = safe_x * self.log_rate - tf.lgamma(1. + safe_x) is_supported = tf.broadcast_to(tf.equal(x, safe_x), tf.shape(y)) neg_inf = tf.fill(tf.shape(y), value=np.array(-np.inf, dtype=y.dtype.as_numpy_dtype)) return tf.where(is_supported, y, neg_inf)
def variational_expectations(self, Fmu, Fvar, Y): if self.invlink is tf.exp: return ( -self.shape * Fmu - tf.lgamma(self.shape) + (self.shape - 1.0) * tf.log(Y) - Y * tf.exp(-Fmu + Fvar / 2.0) ) else: return Likelihood.variational_expectations(self, Fmu, Fvar, Y)
def _log_normalization(self, name='log_normalization'): """Returns the log normalization of an LKJ distribution. Args: name: Python `str` name prefixed to Ops created by this function. Returns: log_z: A Tensor of the same shape and dtype as `concentration`, containing the corresponding log normalizers. """ # The formula is from D. Lewandowski et al [1], p. 1999, from the # proof that eqs 16 and 17 are equivalent. with tf.name_scope('log_normalization_lkj', name, [self.concentration]): logpi = np.log(np.pi) ans = tf.zeros_like(self.concentration) for k in range(1, self.dimension): ans += logpi * (k / 2.) ans += tf.lgamma(self.concentration + (self.dimension - 1 - k) / 2.) ans -= tf.lgamma(self.concentration + (self.dimension - 1) / 2.) return ans
def GumbelSoftmaxLogDensity(y, p, tau): # EPS = tf.constant(1e-10) k = tf.shape(y)[-1] k = tf.cast(k, tf.float32) # y = y + EPS # y = tf.divide(y, tf.reduce_sum(y, -1, keep_dims=True)) y = normalize_to_unit_sum(y) sum_p_over_y = tf.reduce_sum(tf.divide(p, tf.pow(y, tau)), -1) logp = tf.lgamma(k) logp = logp + (k - 1) * tf.log(tau) logp = logp - k * tf.log(sum_p_over_y) logp = logp + sum_p_over_y return logp
def __init__(self, train_set, test_set, dictparam): super(Elosplit, self).__init__(train_set, test_set, dictparam) k = tf.constant(map(lambda x: float(x), range(10))) last_vect = tf.expand_dims(ToolBox.last_vector(10),0) win_vector = ToolBox.win_vector(10) # Define parameters self.elo_atk = tf.Variable(tf.zeros([self.nb_teams, self.nb_times])) self.elo_def = tf.Variable(tf.zeros([self.nb_teams, self.nb_times])) # Define the model for key, proxy in [('train', self.train_data), ('test', self.test_data)]: elo_atk_h = ToolBox.get_elomatch(proxy['team_h'], proxy['time'], self.elo_atk) elo_def_h = ToolBox.get_elomatch(proxy['team_h'], proxy['time'], self.elo_def) elo_atk_a = ToolBox.get_elomatch(proxy['team_a'], proxy['time'], self.elo_atk) elo_def_a = ToolBox.get_elomatch(proxy['team_a'], proxy['time'], self.elo_def) lambda_h = tf.expand_dims(tf.exp(self.param['goals_bias'] + elo_atk_h - elo_def_a), 1) lambda_a = tf.expand_dims(tf.exp(self.param['goals_bias'] + elo_atk_a - elo_def_h), 1) score_h = tf.exp(-lambda_h + tf.log(lambda_h) * k - tf.lgamma(k + 1)) score_a = tf.exp(-lambda_a + tf.log(lambda_a) * k - tf.lgamma(k + 1)) score_h += tf.matmul(tf.expand_dims((1. - tf.reduce_sum(score_h, reduction_indices=[1])), 1), last_vect) score_a += tf.matmul(tf.expand_dims((1. - tf.reduce_sum(score_a, reduction_indices=[1])), 1), last_vect) self.score[key] = tf.batch_matmul(tf.expand_dims(score_h, 2), tf.expand_dims(score_a, 1)) self.res[key] = tf.reduce_sum(self.score[key] * win_vector, reduction_indices=[1,2]) # Define the costs self.init_cost() for key in ['train', 'test']: for proxy in [self.elo_atk, self.elo_def]: cost = ToolBox.get_raw_elo_cost(self.param['metaparam0'], self.param['metaparam1'], proxy, self.nb_times) self.regulizer[key].append(cost) cost = ToolBox.get_timediff_elo_cost(self.param['metaparam2'], proxy, self.nb_times) self.regulizer[key].append(cost) # Finish the initialization super(Elosplit, self).finish_init()
def log_zinb_positive(x, mu, theta, pi, eps=1e-8): """ log likelihood (scalar) of a minibatch according to a zinb model. Notes: We parametrize the bernouilli using the logits, hence the softplus functions appearing Variables: mu: mean of the negative binomial (has to be positive support) (shape: minibatch x genes) theta: inverse dispersion parameter (has to be positive support) (shape: minibatch x genes) pi: logit of the dropout parameter (real support) (shape: minibatch x genes) eps: numerical stability constant """ case_zero = tf.nn.softplus(- pi + theta * tf.log(theta + eps) - theta * tf.log(theta + mu + eps)) \ - tf.nn.softplus( - pi) case_non_zero = - pi - tf.nn.softplus(- pi) \ + theta * tf.log(theta + eps) - theta * tf.log(theta + mu + eps) \ + x * tf.log(mu + eps) - x * tf.log(theta + mu + eps) \ + tf.lgamma(x + theta) - tf.lgamma(theta) - tf.lgamma(x + 1) mask = tf.cast(tf.less(x, eps), tf.float32) res = tf.multiply(mask, case_zero) + tf.multiply(1 - mask, case_non_zero) return tf.reduce_sum(res, axis=-1)
def logp(self, bin_counts): """Compute the log probability for the counts in the bin, under the model. Args: bin_counts: array-like integer counts Returns: The log-probability under the Poisson models for each element of bin_counts. """ k = tf.to_float(bin_counts) # log poisson(k, r) = log(r^k * e^(-r) / k!) = k log(r) - r - log k! # log poisson(k, r=exp(x)) = k * x - exp(x) - lgamma(k + 1) return k * self.logr - tf.exp(self.logr) - tf.lgamma(k + 1)
def logpmf(self, x, n, p): """Log of the probability mass function. Parameters ---------- x : tf.Tensor A n-D tensor. n : int A tensor of same shape as ``x``, and with all elements constrained to :math:`n > 0`. p : tf.Tensor A tensor of same shape as ``x``, and with all elements constrained to :math:`p\in(0,1)`. Returns ------- tf.Tensor A tensor of same shape as input. """ x = tf.cast(x, dtype=tf.float32) n = tf.cast(n, dtype=tf.float32) p = tf.cast(p, dtype=tf.float32) return tf.lgamma(x + n) - tf.lgamma(x + 1.0) - tf.lgamma(n) + \ n * tf.log(p) + x * tf.log(1.0 - p)
def Poisson(lambda_, name=None): k = tf.placeholder(config.int_dtype, name=name) # FIXME tf.lgamma only supports floats so cast before Distribution.logp = ( tf.cast(k, config.dtype)*tf.log(lambda_) - lambda_ - tf.lgamma(tf.cast(k+1, config.dtype)) ) # TODO Distribution.integral = ... def integral(l, u): return tf.constant(1, dtype=config.dtype) Distribution.integral = integral return k
def logpdf(self, x, df): """Log of the probability density function. Parameters ---------- x : tf.Tensor A n-D tensor. df : tf.Tensor A tensor of same shape as ``x``, and with all elements constrained to :math:`df > 0`. Returns ------- tf.Tensor A tensor of same shape as input. """ x = tf.cast(x, dtype=tf.float32) df = tf.cast(df, dtype=tf.float32) return (0.5 * df - 1) * tf.log(x) - 0.5 * x - \ 0.5 * df * tf.log(2.0) - tf.lgamma(0.5 * df)
def logpmf(self, x, mu): """Log of the probability mass function. Parameters ---------- x : tf.Tensor A n-D tensor. mu : tf.Tensor A tensor of same shape as ``x``, and with all elements constrained to :math:`mu > 0`. Returns ------- tf.Tensor A tensor of same shape as input. """ x = tf.cast(x, dtype=tf.float32) mu = tf.cast(mu, dtype=tf.float32) return x * tf.log(mu) - mu - tf.lgamma(x + 1.0)
def _log_normalization(self): """Computes the log-normalizer of the distribution.""" event_dim = self.event_shape[0].value if event_dim is None: raise ValueError('vMF _log_normalizer currently only supports ' 'statically known event shape') safe_conc = tf.where(self.concentration > 0, self.concentration, tf.ones_like(self.concentration)) safe_lognorm = ((event_dim / 2 - 1) * tf.log(safe_conc) - (event_dim / 2) * np.log(2 * np.pi) - tf.log(_bessel_ive(event_dim / 2 - 1, safe_conc)) - tf.abs(safe_conc)) log_nsphere_surface_area = ( np.log(2.) + (event_dim / 2) * np.log(np.pi) - tf.lgamma(tf.cast(event_dim / 2, self.dtype))) return tf.where(self.concentration > 0, -safe_lognorm, log_nsphere_surface_area * tf.ones_like(safe_lognorm))
def entropy(self, a, scale=1): """Entropy of probability distribution. Parameters ---------- a : tf.Tensor **Shape** parameter. A n-D tensor with all elements constrained to :math:`a > 0`. scale : tf.Tensor **Scale** parameter. A n-D tensor with all elements constrained to :math:`scale > 0`. Returns ------- tf.Tensor A tensor of same shape as input. """ a = tf.cast(a, dtype=tf.float32) scale = tf.cast(scale, dtype=tf.float32) return a + tf.log(scale*tf.exp(tf.lgamma(a))) - \ (1.0 + a) * tf.digamma(a)
def logpdf(self, x, a, scale=1): """Log of the probability density function. Parameters ---------- x : tf.Tensor A n-D tensor. a : tf.Tensor **Shape** parameter. A tensor of same shape as ``x``, and with all elements constrained to :math:`a > 0`. scale : tf.Tensor **Scale** parameter. A tensor of same shape as ``x``, and with all elements constrained to :math:`scale > 0`. Returns ------- tf.Tensor A tensor of same shape as input. """ x = tf.cast(x, dtype=tf.float32) a = tf.cast(a, dtype=tf.float32) scale = tf.cast(scale, dtype=tf.float32) return (a - 1.0) * tf.log(x) - x/scale - a * tf.log(scale) - tf.lgamma(a)
def _log_prob(self, x): x = self._assert_valid_sample(x) # broadcast logits or x if need be. logits = self.logits if (not x.shape.is_fully_defined() or not logits.shape.is_fully_defined() or x.shape != logits.shape): logits = tf.ones_like(x, dtype=logits.dtype) * logits x = tf.ones_like(logits, dtype=x.dtype) * x logits_shape = tf.shape(tf.reduce_sum(logits, axis=[-1])) logits_2d = tf.reshape(logits, [-1, self.event_size]) x_2d = tf.reshape(x, [-1, self.event_size]) # compute the normalization constant k = tf.cast(self.event_size, x.dtype) log_norm_const = (tf.lgamma(k) + (k - 1.) * tf.log(self.temperature)) # compute the unnormalized density log_softmax = tf.nn.log_softmax(logits_2d - x_2d * self._temperature_2d) log_unnorm_prob = tf.reduce_sum(log_softmax, [-1], keepdims=False) # combine unnormalized density with normalization constant log_prob = log_norm_const + log_unnorm_prob # Reshapes log_prob to be consistent with shape of user-supplied logits ret = tf.reshape(log_prob, logits_shape) return ret
def gamma(x, shape, scale): return -shape * tf.log(scale) - tf.lgamma(shape)\ + (shape - 1.) * tf.log(x) - x / scale
def _log_normalization(self): return (tf.log(tf.abs(self.scale)) + 0.5 * tf.log(self.df) + 0.5 * np.log(np.pi) + tf.lgamma(0.5 * self.df) - tf.lgamma(0.5 * (self.df + 1.)))
def _entropy(self): return (self.concentration + tf.log(self.rate) + tf.lgamma(self.concentration) - ((1. + self.concentration) * tf.digamma(self.concentration)))
def log_q(self, z, alpha, beta): return (alpha - 1) * tf.log(z) - beta * z + alpha * tf.log( beta) - tf.lgamma(alpha)
def GO_Gamma_v2(x, alpha): # x sim Gamma(alpha, 1) x = tf.cast(x, tf.float64) alpha = tf.cast(alpha, tf.float64) logx = tf.log(x) ex_gamma_xa = tf.exp(x + tf.lgamma(alpha) + (1. - alpha) * logx) psi_m_log = tf.digamma(alpha + 1.) - logx igamma_up_reg = tf.igammac(alpha, x) # Part 1 indx1 = tf.where(x <= 1e-2) x_indx1 = tf.gather_nd(x, indx1) alpha_indx1 = tf.gather_nd(alpha, indx1) GO_Gamma_alpha_value1 = tf.exp(x_indx1) * x_indx1 / alpha_indx1 * ( tf.gather_nd(psi_m_log, indx1) + x_indx1 / tf.pow(alpha_indx1 + 1., 2) - tf.pow(x_indx1 / (alpha_indx1 + 2.), 2) + 0.5 * tf.pow(x_indx1, 3) / tf.pow(alpha_indx1 + 3., 2) ) # Part 2 N_alpha = tf.round(tf.exp( - 0.488484605941243044124888683654717169702053070068359375 * tf.log(alpha) + 1.6948389987594634220613443176262080669403076171875 )) indx2 = tf.where(tf.logical_and( tf.logical_and(x > 1e-2, alpha <= 3.), (x <= (alpha + N_alpha * tf.sqrt(alpha))) )) KK = 15 kk = tf.cast(tf.range(1, KK + 1), tf.float64) x_indx2 = tf.gather_nd(x, indx2) alpha_indx2 = tf.gather_nd(alpha, indx2) GO_Gamma_alpha_value2 = tf.gather_nd(ex_gamma_xa, indx2) * ( -tf.gather_nd(psi_m_log, indx2) * tf.gather_nd(igamma_up_reg, indx2) + ( tf.digamma(alpha_indx2 + KK + 1.) - tf.gather_nd(logx, indx2) - tf.reduce_sum( tf.igammac(tf.expand_dims(alpha_indx2, 1) + tf.expand_dims(kk, 0), tf.expand_dims(x_indx2, 1)) / (tf.expand_dims(alpha_indx2, 1) + tf.expand_dims(kk, 0)) , 1) ) ) # Part 2_1 indx2_1 = tf.where(tf.logical_and( tf.logical_and(x > 1e-2, alpha <= 3.), (x > (alpha + N_alpha * tf.sqrt(alpha))) )) KK = 15 kk = tf.cast(tf.range(1, KK + 1), tf.float64) x_indx2_1 = tf.gather_nd(x, indx2_1) alpha_indx2_1 = tf.gather_nd(alpha, indx2_1) GO_Gamma_alpha_value2_1 = tf.gather_nd(ex_gamma_xa, indx2_1) * ( -tf.gather_nd(psi_m_log, indx2_1) * tf.gather_nd(igamma_up_reg, indx2_1) + ( tf.digamma(alpha_indx2_1 + KK + 1.) - tf.gather_nd(logx, indx2_1) - tf.reduce_sum( tf.igammac(tf.expand_dims(alpha_indx2_1, 1) + tf.expand_dims(kk, 0), tf.expand_dims(x_indx2_1, 1)) / (tf.expand_dims(alpha_indx2_1, 1) + tf.expand_dims(kk, 0)) , 1) ) ) GO_Gamma_alpha_value2_1 = tf.maximum( GO_Gamma_alpha_value2_1, 1. / alpha_indx2_1 - tf.gather_nd(ex_gamma_xa, indx2_1) * tf.gather_nd(psi_m_log, indx2_1) * tf.gather_nd(igamma_up_reg, indx2_1) ) # Part 3 indx3 = tf.where( tf.logical_and( tf.logical_and(x > 1e-2, alpha > 3.), alpha <= 500. ) ) KK = 10 kk = tf.cast(tf.range(1, KK + 1), tf.float64) x_indx3 = tf.gather_nd(x, indx3) alpha_indx3 = tf.gather_nd(alpha, indx3) x_l = alpha_indx3 - tf.log(alpha_indx3) * tf.sqrt(alpha_indx3) logx_l = tf.log(x_l) ex_gamma_xa_l = tf.exp(x_l + tf.lgamma(alpha_indx3) + (1. - alpha_indx3) * logx_l) psi_m_log_l = tf.digamma(alpha_indx3 + 1.) - logx_l igamma_low_reg_l = tf.igamma(alpha_indx3, x_l) # igamma_up_reg_l = tf.igammac(alpha_indx3, x_l) # f_l = ex_gamma_xa_l * ( # -psi_m_log_l * igamma_up_reg_l + # (tf.digamma(alpha_indx3 + KK + 1.) - logx_l - # tf.reduce_sum( # tf.igammac(tf.expand_dims(alpha_indx3, 1) + tf.expand_dims(kk, 0), tf.expand_dims(x_l, 1)) / # (tf.expand_dims(alpha_indx3, 1) + tf.expand_dims(kk, 0)) # , 1)) # ) f_l = ex_gamma_xa_l * ( psi_m_log_l * igamma_low_reg_l + tf.reduce_sum( tf.igamma(tf.expand_dims(alpha_indx3, 1) + tf.expand_dims(kk, 0), tf.expand_dims(x_l, 1)) / (tf.expand_dims(alpha_indx3, 1) + tf.expand_dims(kk, 0)) , 1) ) g_l = (1. + (1. - alpha_indx3) / x_l) * f_l + ( -ex_gamma_xa_l / x_l * igamma_low_reg_l + (psi_m_log_l + tf.reduce_sum( tf.exp( tf.expand_dims(kk, 0) * tf.log(tf.expand_dims(x_l, 1)) + tf.lgamma(tf.expand_dims(alpha_indx3, 1)) - tf.lgamma( tf.expand_dims(alpha_indx3, 1) + tf.expand_dims(kk, 0) + 1.) ) , 1)) ) x_m = alpha_indx3 f_m = 1. + 0.167303227226226980395296095593948848545551300048828125 / \ ( tf.pow(x_m, 1.0008649793164192676186985409003682434558868408203125) - 0.07516433982238841793321881823430885560810565948486328125 ) x_r = 2. * alpha_indx3 - x_l f_r = 1. / alpha_indx3 - tf.exp(x_r + tf.lgamma(alpha_indx3) + (1. - alpha_indx3) * tf.log(x_r)) * ( (tf.digamma(alpha_indx3 + 1.) - tf.log(x_r)) * tf.igammac(alpha_indx3, x_r) ) lambda_r = tf.exp( 959.627335718427275423891842365264892578125 / ( tf.pow(alpha_indx3, 1.324768828487964622553363369661383330821990966796875) + 142.427456986662718918523751199245452880859375 ) - 13.01439996187340142341781756840646266937255859375 ) x_mat_i = tf.concat([tf.expand_dims(x_l, 1), tf.expand_dims(x_m, 1), tf.expand_dims(x_r, 1)], 1) x_mat_bar_i = x_mat_i - tf.expand_dims(alpha_indx3, 1) x_mat_hat_i = tf.sqrt(x_mat_i) - tf.sqrt(tf.expand_dims(alpha_indx3, 1)) f_mat_i = tf.concat([tf.expand_dims(f_l, 1), tf.expand_dims(f_m, 1), tf.expand_dims(f_r, 1)], 1) lambda_mat_i = tf.concat([tf.expand_dims(tf.ones_like(alpha_indx3), 1), tf.expand_dims(tf.ones_like(alpha_indx3), 1), tf.expand_dims(lambda_r, 1) ], 1) x_mat_j = tf.expand_dims(x_l, 1) g_mat_j = tf.expand_dims(g_l, 1) lambda_mat_j = tf.expand_dims(tf.ones_like(alpha_indx3), 1) A = tf.reduce_sum(lambda_mat_i * tf.pow(x_mat_bar_i, 2), 1) + tf.reduce_sum(lambda_mat_j, 1) B = tf.reduce_sum(lambda_mat_i * x_mat_bar_i * x_mat_hat_i, 1) + \ tf.reduce_sum(lambda_mat_j / 2. / tf.sqrt(x_mat_j), 1) C = tf.reduce_sum(lambda_mat_i * x_mat_bar_i, 1) D = tf.reduce_sum(lambda_mat_i * tf.pow(x_mat_hat_i, 2), 1) + tf.reduce_sum(lambda_mat_j / 4. / x_mat_j, 1) E = tf.reduce_sum(lambda_mat_i * x_mat_hat_i, 1) F = tf.reduce_sum(lambda_mat_i, 1) G = tf.reduce_sum(lambda_mat_i * x_mat_bar_i * f_mat_i, 1) + tf.reduce_sum(lambda_mat_j * g_mat_j, 1) H = tf.reduce_sum(lambda_mat_i * x_mat_hat_i * f_mat_i, 1) + \ tf.reduce_sum(lambda_mat_j / 2. / tf.sqrt(x_mat_j) * g_mat_j, 1) I = tf.reduce_sum(lambda_mat_i * f_mat_i, 1) Z = F * tf.pow(B, 2) - 2. * B * C * E + D * tf.pow(C, 2) + A * tf.pow(E, 2) - A * D * F a_cor = 1. / Z * (G * (tf.pow(E, 2) - D * F) + H * (B * F - C * E) - I * (B * E - C * D)) b_cor = 1. / Z * (G * (B * F - C * E) + H * (tf.pow(C, 2) - A * F) - I * (B * C - A * E)) c_cor = 1. / Z * (-G * (B * E - C * D) + I * (tf.pow(B, 2) - A * D) - H * (B * C - A * E)) GO_Gamma_alpha_value3 = a_cor * (x_indx3 - alpha_indx3) + b_cor * (tf.sqrt(x_indx3) - tf.sqrt(alpha_indx3)) + c_cor GO_Gamma_alpha_value3 = tf.maximum( GO_Gamma_alpha_value3, 1. / alpha_indx3 - tf.gather_nd(ex_gamma_xa, indx3) * tf.gather_nd(psi_m_log, indx3) * tf.gather_nd(igamma_up_reg, indx3) ) # Part 4 # indx4 = tf.where( # tf.logical_and( # tf.logical_and(x > 1e-2, alpha > 500.), # (x <= (alpha + 2. * tf.log(alpha) * tf.sqrt(alpha))) # ) # ) indx4 = tf.where( tf.logical_and(x > 1e-2, alpha > 500.) ) x_indx4 = tf.gather_nd(x, indx4) alpha_indx4 = tf.gather_nd(alpha, indx4) f_m_large = 1. + 0.167303227226226980395296095593948848545551300048828125 / \ ( tf.pow(alpha_indx4, 1.0008649793164192676186985409003682434558868408203125) - 0.07516433982238841793321881823430885560810565948486328125 ) g_m_large = 0.54116502161502622048061539317131973803043365478515625 * \ tf.pow(alpha_indx4, -1.010274491769996618728555404231883585453033447265625) GO_Gamma_alpha_value4 = f_m_large + g_m_large * (x_indx4 - alpha_indx4) # Part 4_1 # indx4_1 = tf.where( # tf.logical_and( # tf.logical_and(x > 1e-2, alpha > 500.), # (x > (alpha + 2. * tf.log(alpha) * tf.sqrt(alpha))) # ) # ) # alpha_indx4_1 = tf.gather_nd(alpha, indx4_1) # GO_Gamma_alpha_value4_1 = 1. / alpha_indx4_1 - tf.gather_nd(ex_gamma_xa, indx4_1) * \ # tf.gather_nd(psi_m_log, indx4_1) * tf.gather_nd(igamma_up_reg, indx4_1) # Summerize GO_Gamma_alpha = tf.sparse_to_dense(indx1, x.shape, GO_Gamma_alpha_value1) + \ tf.sparse_to_dense(indx2, x.shape, GO_Gamma_alpha_value2) + \ tf.sparse_to_dense(indx2_1, x.shape, GO_Gamma_alpha_value2_1) + \ tf.sparse_to_dense(indx3, x.shape, GO_Gamma_alpha_value3) + \ tf.sparse_to_dense(indx4, x.shape, GO_Gamma_alpha_value4) # + \ # tf.sparse_to_dense(indx4_1, x.shape, GO_Gamma_alpha_value4_1) GO_Gamma_alpha = tf.cast(GO_Gamma_alpha, tf.float32) return GO_Gamma_alpha # , x_l, x_r, f_l, f_m, f_r, g_l
def _verifySampleAndPdfConsistency(self, vmf, rtol=0.075): """Verifies samples are consistent with the PDF using importance sampling. In particular, we verify an estimate the surface area of the n-dimensional hypersphere, and the surface areas of the spherical caps demarcated by a handful of survival rates. Args: vmf: A `VonMisesFisher` distribution instance. rtol: Relative difference tolerable. """ dim = tf.dimension_value(vmf.event_shape[-1]) nsamples = 50000 samples = vmf.sample(sample_shape=[nsamples]) samples = tf.check_numerics(samples, 'samples') log_prob = vmf.log_prob(samples) log_prob = tf.check_numerics(log_prob, 'log_prob') log_importance = -log_prob sphere_surface_area_estimate, samples, importance, conc = self.evaluate( [ tf.exp( tf.reduce_logsumexp(log_importance, axis=0) - tf.log(tf.to_float(nsamples))), samples, tf.exp(log_importance), vmf.concentration ]) true_sphere_surface_area = 2 * (np.pi)**(dim / 2) * self.evaluate( tf.exp(-tf.lgamma(dim / 2))) # Broadcast to correct size true_sphere_surface_area += np.zeros_like(sphere_surface_area_estimate) # Highly concentrated distributions do not get enough coverage to provide # a reasonable full-sphere surface area estimate. These are covered below # by CDF-based hypersphere cap surface area estimates. self.assertAllClose(true_sphere_surface_area[np.where(conc < 3)], sphere_surface_area_estimate[np.where(conc < 3)], rtol=rtol) # Assert surface area of hyperspherical cap For some CDFs in [.05,.45], # (h must be greater than 0 for the hypersphere cap surface area # calculation to hold). for survival_rate in 0.95, .9, .75, .6: cdf = (1 - survival_rate) mean_dir = self.evaluate(vmf.mean_direction) dotprods = np.sum(samples * mean_dir, -1) # Empirical estimate of the effective dot-product of the threshold that # selects for a given CDF level, that is the cosine of the largest # passable angle, or the minimum cosine for a within-CDF sample. dotprod_thresh = np.percentile(dotprods, 100 * survival_rate, axis=0, keepdims=True) dotprod_above_thresh = np.float32(dotprods > dotprod_thresh) sphere_cap_surface_area_ests = ( cdf * (importance * dotprod_above_thresh).sum(0) / dotprod_above_thresh.sum(0)) h = (1 - dotprod_thresh) self.assertGreaterEqual(h.min(), 0) # h must be >= 0 for the eqn below true_sphere_cap_surface_area = ( 0.5 * true_sphere_surface_area * self.evaluate(tf.betainc((dim - 1) / 2, 0.5, 2 * h - h**2))) if dim == 3: # For 3-d we have a simpler form we can double-check. self.assertAllClose(2 * np.pi * h, true_sphere_cap_surface_area) self.assertAllClose(true_sphere_cap_surface_area, sphere_cap_surface_area_ests + np.zeros_like(true_sphere_cap_surface_area), rtol=rtol)
def log_cond_prob(self, num_samples, outputs, latent_samples, sample_means, sample_vars, weights_samples, means_w, covars_weights, *args): full_covar = init_list(0.0, [self.num_latent]) for q in range(self.num_latent): covar_input = covars_weights[q, :, :] full_covar[q] = tf.matmul(covar_input, tf.transpose(covar_input)) full_covar = tf.stack(full_covar) ### NOTE ### USE WITH TRUNCATION # Improve identifiability # full_covar = tf.matrix_band_part(full_covar, -1, 0) # means_w = tf.matrix_band_part(means_w, -1, 0) #cov_diagonal = tf.matrix_band_part(tf.transpose(tf.matrix_diag_part(full_covar)), -1, 0) #var_w = cov_diagonal means_w = tf.transpose(means_w) cov_diagonal = tf.transpose(tf.matrix_diag_part(full_covar)) var_w = cov_diagonal prediction = init_list(0.0, [self.num_tasks]) outputs_no_missing = tf.boolean_mask(outputs, self.ytrain_non_missing_index) product_exp = tf.transpose( tf.matmul(means_w, tf.transpose(sample_means)) + self.offsets) means_w = tf.expand_dims(means_w, axis=2), var_w = tf.expand_dims(var_w, axis=2) sample_means = tf.transpose(tf.tile( tf.expand_dims(sample_means, axis=0), [self.num_tasks, 1, 1]), perm=[0, 2, 1]) sample_vars = tf.transpose(tf.tile(tf.expand_dims(sample_vars, axis=0), [self.num_tasks, 1, 1]), perm=[0, 2, 1]) first_term = tf.reduce_prod(tf.exp( (tf.multiply(means_w, sample_means)[0] + (tf.multiply(tf.square(means_w), sample_vars)[0] + tf.multiply(var_w, tf.square(sample_means))) / 2) / ((1.0 - tf.multiply(var_w, sample_vars)))), axis=1) second_term = tf.reduce_prod( 1.0 / tf.sqrt(1.0 - tf.multiply(var_w, sample_vars)), axis=1) prediction = tf.transpose(first_term * second_term * tf.exp(self.offsets)) prediction_no_missing = tf.boolean_mask(prediction, self.ytrain_non_missing_index) product_exp_no_missing = tf.boolean_mask(product_exp, self.ytrain_non_missing_index) log_lik = product_exp_no_missing * outputs_no_missing - prediction_no_missing - tf.lgamma( outputs_no_missing + 1.0) ell = tf.reduce_sum(log_lik) return (ell, sample_means, sample_vars, means_w, var_w, self.offsets)
def test_Lgamma(self): t = tf.lgamma(self.random(4, 3)) self.check(t)
def _log_normalization(self): return (tf.lgamma(self.concentration) + 0.5 * math.log(2. * math.pi) - self.concentration * tf.log(self.rate) - 0.5 * tf.log(self._lambda))
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term, dropout_in): # encoding mu1, sigma1, mix1 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder1") mu2, sigma2, mix2 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder2") mu3, sigma3, mix3 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder3") mu4, sigma4, mix4 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder4") z1 = distributions.Normal(loc=mu1, scale=sigma1) z2 = distributions.Normal(loc=mu2, scale=sigma2) z3 = distributions.Normal(loc=mu3, scale=sigma3) z4 = distributions.Normal(loc=mu4, scale=sigma4) init_min = 0.1 init_max = 0.1 init_min = (np.log(init_min) - np.log(1. - init_min)) init_max = (np.log(init_max) - np.log(1. - init_max)) dropout_a = tf.get_variable(name='dropout', shape=None, initializer=tf.random_uniform( (1,), init_min, init_max), dtype=tf.float32, trainable=True) dropout_p = tf.nn.sigmoid(dropout_a[0]) eps = 1e-7 temp = 0.1 unif_noise = tf.random_uniform(shape=[batch_size,4]) drop_prob = ( tf.log(dropout_p + eps) - tf.log(1. - dropout_p + eps) + tf.log(unif_noise + eps) - tf.log(1. - unif_noise + eps) ) drop_prob = tf.nn.sigmoid(drop_prob / temp) random_tensor = 1. - drop_prob retain_prob = 1. - dropout_p dropout_samples = random_tensor / retain_prob dropout_regularizer = dropout_p * tf.log(dropout_p) dropout_regularizer += (1. - dropout_p) * tf.log(1. - dropout_p) dropout_regularizer *= dropout_regularizer *10 * -1 dropout_regularizer = tf.clip_by_value(dropout_regularizer,-10,0) sum1 = mix1 + mix2 + mix3 + mix4 mix1 = mix1 / sum1 mix2 = mix2 / sum1 mix3 = mix3 / sum1 mix4 = mix4 / sum1 mix = tf.concat([mix1, mix2, mix3, mix4], 1) mix_parameters = mix dist = tf.distributions.Dirichlet(mix) mix_samples = dist.sample() mix = mix_samples mix_dropout1 = dropout_samples[:,0:1]*mix_samples[:,0:1] mix_dropout2 = dropout_samples[:,1:2]*mix_samples[:,1:2] mix_dropout3 = dropout_samples[:,2:3]*mix_samples[:,2:3] mix_dropout4 = dropout_samples[:,3:4]*mix_samples[:,3:4] sum1 = mix_dropout1+mix_dropout2+mix_dropout3+mix_dropout4 mix_dropout1 = mix_dropout1/sum1 mix_dropout2 = mix_dropout2/sum1 mix_dropout3 = mix_dropout3/sum1 mix_dropout4 = mix_dropout4/sum1 # sampling by re-parameterization technique # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32) z1_samples = z1.sample() z2_samples = z2.sample() z3_samples = z3.sample() z4_samples = z4.sample() ttf = [] ttf.append(z1_samples) ttf.append(z2_samples) ttf.append(z3_samples) ttf.append(z4_samples) dHSIC_Value = dHSIC(ttf) # decoding y1 = Create_SubDecoder(z1_samples, n_hidden, dim_img, keep_prob, "decoder1") y2 = Create_SubDecoder(z2_samples, n_hidden, dim_img, keep_prob, "decoder2") y3 = Create_SubDecoder(z3_samples, n_hidden, dim_img, keep_prob, "decoder3") y4 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob, "decoder4") # dropout out y1 = y1 * mix_dropout1 y2 = y2 * mix_dropout2 y3 = y3 * mix_dropout3 y4 = y4 * mix_dropout4 y = y1 + y2 + y3 + y4 output = Create_FinalDecoder(y, n_hidden, dim_img, keep_prob, "final") y = output m1 = np.zeros(dim_z, dtype=np.float32) m1[:] = 0 v1 = np.zeros(dim_z, dtype=np.float32) v1[:] = 1 # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32), # scale=np.ones(dim_z, dtype=np.float32)) p_z1 = distributions.Normal(loc=m1, scale=v1) m2 = np.zeros(dim_z, dtype=np.float32) m2[:] = 0 v2 = np.zeros(dim_z, dtype=np.float32) v2[:] = 1 p_z2 = distributions.Normal(loc=m2, scale=v2) m3 = np.zeros(dim_z, dtype=np.float32) m3[:] = 0 v3 = np.zeros(dim_z, dtype=np.float32) v3[:] = 1 p_z3 = distributions.Normal(loc=m3, scale=v3) m4 = np.zeros(dim_z, dtype=np.float32) m4[:] = 0 v4 = np.zeros(dim_z, dtype=np.float32) v4[:] = 1 p_z4 = distributions.Normal(loc=m4, scale=v4) kl1 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1)) kl2 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1)) kl3 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1)) kl4 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1)) KL_divergence = (kl1 + kl2 + kl3 + kl4) / 4.0 # loss marginal_likelihood = tf.reduce_sum(x * tf.log(y) + (1 - x) * tf.log(1 - y), 1) marginal_likelihood = tf.reduce_mean(marginal_likelihood) # KL divergence between two Dirichlet distributions a1 = tf.clip_by_value(mix_parameters, 0.1, 0.8) a2 = tf.constant((0.25, 0.25, 0.25, 0.25), shape=(batch_size, 4)) r = tf.reduce_sum((a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1) a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma(tf.reduce_sum(a2, axis=1)) + tf.reduce_sum(tf.lgamma(a2), axis=-1) - tf.reduce_sum( tf.lgamma(a1), axis=1) + r kl = a kl = tf.reduce_mean(kl) p1 = 1 p2 = 1 p4 = 1 ELBO = marginal_likelihood - KL_divergence * p2 loss = -ELBO + kl * p1 + p4 * dHSIC_Value + dropout_regularizer z = z1_samples return y, z, loss, -marginal_likelihood, dropout_regularizer,dropout_p,dropout_samples
def negative_binomial(m, Y, alpha): k = 1 / alpha return tf.lgamma(k + Y) - tf.lgamma(Y + 1) - tf.lgamma(k) + Y * tf.log( m / (m + k)) - k * tf.log(1 + m * alpha)
def poisson(lamb, y): return y * tf.log(lamb) - lamb - tf.lgamma(y + 1.)
def variational_expectations(self, Fmu, Fvar, Y): if self.invlink is tf.exp: return -self.shape * Fmu - tf.lgamma(self.shape) \ + (self.shape - 1.) * tf.log(Y) - Y * tf.exp(-Fmu + Fvar / 2.) else: return Likelihood.variational_expectations(self, Fmu, Fvar, Y)
def variational_expectations(self, Fmu, Fvar, Y): if self.invlink is tf.exp: return Y * Fmu - tf.exp(Fmu + Fvar / 2) - tf.lgamma(Y + 1) else: return Likelihood.variational_expectations(self, Fmu, Fvar, Y)
def _log_normalization(self): return (tf.lgamma(self.concentration) - self.concentration * tf.log(self.rate))
def poisson_loss(y_true, y_pred): # mask = tf.cast(y_true>0,tf.float32) y_true = y_true + 0.001 NLL = tf.lgamma(y_pred + 1) - y_pred * tf.log(y_true) return tf.reduce_mean(NLL)
def mean_poisson_log_likelihood(y_true, y_pred): poisson = y_pred - y_true * K.log(y_pred + K.epsilon()) + lgamma(y_true + 1) return K.mean(poisson, axis=-1)
def bezier_net(self, inputs, n_points, bezier_degree): depth_cpw = 32 * 8 dim_cpw = int((bezier_degree + 1) / 8) kernel_size = (4, 3) # noise_std = 0.01 cpw = tf.layers.dense(inputs, 1024) cpw = tf.layers.batch_normalization(cpw, momentum=0.9) cpw = tf.nn.leaky_relu(cpw, alpha=0.2) cpw = tf.layers.dense(cpw, dim_cpw * 3 * depth_cpw) cpw = tf.layers.batch_normalization(cpw, momentum=0.9) cpw = tf.nn.leaky_relu(cpw, alpha=0.2) cpw = tf.reshape(cpw, (-1, dim_cpw, 3, depth_cpw)) cpw = tf.layers.conv2d_transpose(cpw, int(depth_cpw / 2), kernel_size, strides=(2, 1), padding='same') cpw = tf.layers.batch_normalization(cpw, momentum=0.9) cpw = tf.nn.leaky_relu(cpw, alpha=0.2) # cpw += tf.random_normal(shape=tf.shape(cpw), stddev=noise_std) cpw = tf.layers.conv2d_transpose(cpw, int(depth_cpw / 4), kernel_size, strides=(2, 1), padding='same') cpw = tf.layers.batch_normalization(cpw, momentum=0.9) cpw = tf.nn.leaky_relu(cpw, alpha=0.2) # cpw += tf.random_normal(shape=tf.shape(cpw), stddev=noise_std) cpw = tf.layers.conv2d_transpose(cpw, int(depth_cpw / 8), kernel_size, strides=(2, 1), padding='same') cpw = tf.layers.batch_normalization(cpw, momentum=0.9) cpw = tf.nn.leaky_relu(cpw, alpha=0.2) # cpw += tf.random_normal(shape=tf.shape(cpw), stddev=noise_std) # Control points cp = tf.layers.conv2d( cpw, 1, (1, 2), padding='valid') # batch_size x (bezier_degree+1) x 2 x 1 cp = tf.nn.tanh(cp) cp = tf.squeeze( cp, axis=-1, name='control_point') # batch_size x (bezier_degree+1) x 2 # Weights w = tf.layers.conv2d(cpw, 1, (1, 3), padding='valid') w = tf.nn.sigmoid(w) # batch_size x (bezier_degree+1) x 1 x 1 w = tf.squeeze(w, axis=-1, name='weight') # batch_size x (bezier_degree+1) x 1 # Parameters at data points db = tf.layers.dense(inputs, 1024) db = tf.layers.batch_normalization(db, momentum=0.9) db = tf.nn.leaky_relu(db, alpha=0.2) db = tf.layers.dense(db, 256) db = tf.layers.batch_normalization(db, momentum=0.9) db = tf.nn.leaky_relu(db, alpha=0.2) db = tf.layers.dense(db, n_points - 1) db = tf.nn.softmax(db) # batch_size x (n_data_points-1) ub = tf.pad(db, [[0, 0], [1, 0]], constant_values=0) # batch_size x n_data_points ub = tf.cumsum(ub, axis=1) ub = tf.minimum(ub, 1) ub = tf.expand_dims(ub, axis=-1) # 1 x n_data_points x 1 # Bezier layer # Compute values of basis functions at data points num_control_points = bezier_degree + 1 lbs = tf.tile(ub, [1, 1, num_control_points ]) # batch_size x n_data_points x n_control_points pw1 = tf.range(0, num_control_points, dtype=tf.float32) pw1 = tf.reshape(pw1, [1, 1, -1]) # 1 x 1 x n_control_points pw2 = tf.reverse(pw1, axis=[-1]) lbs = tf.add(tf.multiply(pw1, tf.log(lbs + EPSILON)), tf.multiply(pw2, tf.log(1 - lbs + EPSILON)) ) # batch_size x n_data_points x n_control_points lc = tf.add(tf.lgamma(pw1 + 1), tf.lgamma(pw2 + 1)) lc = tf.subtract( tf.lgamma(tf.cast(num_control_points, dtype=tf.float32)), lc) # 1 x 1 x n_control_points lbs = tf.add(lbs, lc) # batch_size x n_data_points x n_control_points bs = tf.exp(lbs) # Compute data points cp_w = tf.multiply(cp, w) dp = tf.matmul(bs, cp_w) # batch_size x n_data_points x 2 bs_w = tf.matmul(bs, w) # batch_size x n_data_points x 1 dp = tf.div(dp, bs_w) # batch_size x n_data_points x 2 dp = tf.expand_dims( dp, axis=-1, name='x_fake') # batch_size x n_data_points x 2 x 1 return dp, cp, w
def log_gamma(x, a1, b1): yout = tf.reduce_sum( a1 * tf.log(b1) - tf.lgamma(a1) + (a1 - 1.) * tf.log(x) - b1 * x ) return yout
def build_loss(self, seqs_repr, data_ops): """Convert per-location real-valued predictions to a loss.""" # targets tstart = self.batch_buffer // self.target_pool tend = (self.batch_length - self.batch_buffer) // self.target_pool targets = data_ops['label'] targets = tf.identity(targets[:, tstart:tend, :], name='targets_op') # work-around for specifying my own predictions self.preds_adhoc = tf.placeholder(tf.float32, shape=seqs_repr.shape, name='preds-adhoc') # choose link if self.link in ['identity', 'linear']: self.preds_op = tf.identity(seqs_repr, name='preds') elif self.link == 'relu': self.preds_op = tf.relu(seqs_repr, name='preds') elif self.link == 'exp': self.preds_op = tf.exp(tf.clip_by_value(seqs_repr, -50, 50), name='preds') elif self.link == 'exp_linear': self.preds_op = tf.where(seqs_repr > 0, seqs_repr + 1, tf.exp( tf.clip_by_value(seqs_repr, -50, 50)), name='preds') elif self.link == 'softplus': self.preds_op = tf.nn.softplus(seqs_repr, name='preds') elif self.link == 'softmax': # performed in the loss function, but saving probabilities self.preds_prob = tf.nn.softmax(seqs_repr, name='preds') else: print('Unknown link function %s' % self.link, file=sys.stderr) exit(1) # clip if self.target_clip is not None: self.preds_op = tf.clip_by_value(self.preds_op, 0, self.target_clip) targets = tf.clip_by_value(targets, 0, self.target_clip) # sqrt if self.target_sqrt: self.preds_op = tf.sqrt(self.preds_op) targets = tf.sqrt(targets) loss_op = None loss_adhoc = None # choose loss if self.loss == 'gaussian': loss_op = tf.squared_difference(self.preds_op, targets) loss_adhoc = tf.squared_difference(self.preds_adhoc, targets) elif self.loss == 'poisson': loss_op = tf.nn.log_poisson_loss(targets, tf.log(self.preds_op), compute_full_loss=True) loss_adhoc = tf.nn.log_poisson_loss(targets, tf.log(self.preds_adhoc), compute_full_loss=True) elif self.loss == 'negative_binomial': # define overdispersion alphas self.alphas = tf.get_variable( 'alphas', shape=[self.num_targets], initializer=tf.constant_initializer(-5), dtype=tf.float32) self.alphas = tf.nn.softplus(tf.clip_by_value( self.alphas, -50, 50)) tf.summary.histogram('alphas', self.alphas) for ti in np.linspace(0, self.num_targets - 1, 10).astype('int'): tf.summary.scalar('alpha_t%d' % ti, self.alphas[ti]) # compute w/ inverse k = 1. / self.alphas # expand k k_expand = tf.tile(k, [self.batch_size * seq_length]) k_expand = tf.reshape( k_expand, (self.batch_size, seq_length, self.num_targets)) # expand lgamma(k) lgk_expand = tf.tile(tf.lgamma(k), [self.batch_size * seq_length]) lgk_expand = tf.reshape( lgk_expand, (self.batch_size, seq_length, self.num_targets)) # construct loss loss1 = targets * tf.log(self.preds_op / (self.preds_op + k_expand)) loss2 = k_expand * tf.log(k_expand / (self.preds_op + k_expand)) loss3 = tf.lgamma(targets + k_expand) - lgk_expand loss_op = -(loss1 + loss2 + loss3) # adhoc loss1 = targets * tf.log(self.preds_adhoc / (self.preds_adhoc + k_expand)) loss2 = k_expand * tf.log(k_expand / (self.preds_adhoc + k_expand)) loss_adhoc = -(loss1 + loss2 + loss3) elif self.loss == 'negative_binomial_hilbe': # define overdispersion alphas self.alphas = tf.get_variable( 'alphas', shape=[self.num_targets], initializer=tf.constant_initializer(-5), dtype=tf.float32) self.alphas = tf.exp(tf.clip_by_value(self.alphas, -50, 50)) # expand alphas_expand = tf.tile(self.alphas, [self.batch_size * seq_length]) alphas_expand = tf.reshape( alphas_expand, (self.batch_size, seq_length, self.num_targets)) # construct loss loss1 = targets * tf.log(self.preds_op) loss2 = (alphas_expand * targets + 1) / alphas_expand loss3 = tf.log(alphas_expand * self.preds_op + 1) loss_op = -loss1 + loss2 * loss3 # adhoc loss1 = targets * tf.log(self.preds_adhoc) loss3 = tf.log(alphas_expand * self.preds_adhoc + 1) loss_adhoc = -loss1 + loss2 * loss3 elif self.loss == 'gamma': # jchan document loss_op = targets / self.preds_op + tf.log(self.preds_op) loss_adhoc = targets / self.preds_adhoc + tf.log(self.preds_adhoc) elif self.loss == 'cross_entropy': loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=(targets - 1), logits=self.preds_op) loss_adhoc = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=(targets - 1), logits=self.preds_adhoc) else: print('Cannot identify loss function %s' % self.loss) exit(1) # set NaN's to zero # loss_op = tf.boolean_mask(loss_op, tf.logical_not(self.targets_na[:,tstart:tend])) # reduce lossses by batch and position loss_op = tf.reduce_mean(loss_op, axis=[0, 1], name='target_loss') loss_op = tf.check_numerics(loss_op, 'Invalid loss', name='loss_check') loss_adhoc = tf.reduce_mean(loss_adhoc, axis=[0, 1], name='target_loss_adhoc') tf.summary.histogram('target_loss', loss_op) for ti in np.linspace(0, self.num_targets - 1, 10).astype('int'): tf.summary.scalar('loss_t%d' % ti, loss_op[ti]) self.target_losses = loss_op self.target_losses_adhoc = loss_adhoc # define target sigmas """ self.target_sigmas = tf.get_variable('target_sigmas', shape=[self.num_targets], initializer=tf.constant_initializer(2), dtype=tf.float32) self.target_sigmas = tf.nn.softplus(tf.clip_by_value(self.target_sigmas,-50,50)) tf.summary.histogram('target_sigmas', self.target_sigmas) for ti in np.linspace(0,self.num_targets-1,10).astype('int'): tf.summary.scalar('sigma_t%d'%ti, self.target_sigmas[ti]) # self.target_sigmas = tf.ones(self.num_targets) / 2. """ # dot losses target sigmas # loss_op = loss_op / (2*self.target_sigmas) # loss_adhoc = loss_adhoc / (2*self.target_sigmas) # fully reduce loss_op = tf.reduce_mean(loss_op, name='loss') loss_adhoc = tf.reduce_mean(loss_adhoc, name='loss_adhoc') # add extraneous terms loss_op += self.weights_regularizers # + tf.reduce_mean(tf.log(self.target_sigmas)) loss_adhoc += self.weights_regularizers # + tf.reduce_mean(tf.log(self.target_sigmas)) # track tf.summary.scalar('loss', loss_op) self.targets_op = targets return loss_op, loss_adhoc
if Layers >= 3: theta3_s = tf.stop_gradient(theta3) theta3T_s = tf.stop_gradient(theta3T) theta_alpha3_s = tf.stop_gradient(theta_alpha3) theta_beta3_s = tf.stop_gradient(theta_beta3) c4_s = tf.stop_gradient(c4) c_alpha4_s = tf.stop_gradient(c_alpha4) c_beta4_s = tf.stop_gradient(c_beta4) phi3_s = tf.stop_gradient(phi3) phi_alpha3_s = tf.stop_gradient(phi_alpha3) r_s = tf.stop_gradient(r) r_alpha_s = tf.stop_gradient(r_alpha) r_beta_s = tf.stop_gradient(r_beta) # Calculate ELBO Loglike = tf.reduce_sum(x * tf.log(phi_theta1) - phi_theta1 - tf.lgamma(x + 1.)) / (K[0] * batch_size) # Layer 1 if Layers == 1: pta_phi_theta2 = tf.transpose(r) pta_phi_theta2T = tf.transpose(r) else: pta_phi_theta2 = phi_theta2 pta_phi_theta2T = phi_theta2T pta_phi_theta2_s = tf.stop_gradient(pta_phi_theta2) pta_phi_theta2T_s = tf.stop_gradient(pta_phi_theta2T) ELBO = Loglike + log_gamma_minus(theta1T, pta_phi_theta2T_s, c2_s, theta_alpha1_s, theta_beta1_s) / (K[0] * batch_size) ELBO = ELBO + log_dirichlet_minus(phi1, eta_l[1], phi_alpha1_s) / (K[0] * rho_mb * batch_size) ELBO = ELBO + log_gamma_minus(theta1T_s, pta_phi_theta2T_s, c2, theta_alpha1_s, theta_beta1_s) / (K[0] * batch_size) \ + prior * log_gamma_minus(c2, e0, f0, c_alpha2_s, c_beta2_s) / (K[0] * batch_size)
def __init__(self, conf): """ Set up remaining layers, objective and loss functions, gradient compute and apply ops, network parameter synchronization ops, and summary ops. """ super(PolicyVNetwork, self).__init__(conf) self.beta = conf['args'].entropy_regularisation_strength self.use_recurrent = conf['args'].alg_type == 'a3c-lstm' with tf.name_scope(self.name): self.critic_target_ph = tf.placeholder( 'float32', [None], name='target') self.adv_actor_ph = tf.placeholder("float", [None], name='advantage') if self.use_recurrent: layer_name = 'lstm_layer' self.hidden_state_size = 256 with tf.variable_scope(self.name+'/'+layer_name) as vs: self.lstm_cell = CustomBasicLSTMCell(self.hidden_state_size, forget_bias=1.0) self.step_size = tf.placeholder(tf.float32, [1], name='step_size') self.initial_lstm_state = tf.placeholder( tf.float32, [1, 2*self.hidden_state_size], name='initital_state') ox_reshaped = tf.reshape(self.ox, [1,-1,256]) lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn( self.lstm_cell, ox_reshaped, initial_state=self.initial_lstm_state, sequence_length=self.step_size, time_major=False, scope=vs) self.ox = tf.reshape(lstm_outputs, [-1,256]) # Get all LSTM trainable params self.lstm_trainable_variables = [v for v in tf.trainable_variables() if v.name.startswith(vs.name)] # Final actor layer layer_name = 'softmax_policy4' self.wpi, self.bpi, self.output_layer_pi, self.log_output_layer_pi = layers.softmax_and_log_softmax( layer_name, self.ox, self.num_actions) # Compute Poisson x Discrete Gaussian self.t = tf.placeholder(tf.float32, num_actions) self.l = tf.placeholder(tf.float32, num_actions) k = np.array(list(range(10))) t_hat = tf.log(1+tf.exp(self.t)) l_hat = tf.log(1+tf.exp(self.l)) self.action_repeat_probs = tf.exp(-(k-l_hat)**2/(2*t_hat) - l_hat - tf.lgamma(k+1)) * l_hat**k self.log_action_repeat_probs = -(k-l_hat)**2/(2*t_hat) - l_hat - tf.lgamma(k+1) + k*tf.log(l_hat) self.selected_repeat = self.placeholder(tf.int32) self.selected_repeat_prob = self.action_repeat_probs[self.selected_repeat] # Entropy: ∑_a[-p_a ln p_a] self.output_layer_entropy = tf.reduce_sum( - 1.0 * tf.multiply( self.output_layer_pi * self.action_repeat_probs, self.log_output_layer_pi + self.log_action_repeat_probs ), axis=1) self.entropy = tf.reduce_mean(self.output_layer_entropy) # Final critic layer self.wv, self.bv, self.output_layer_v = layers.fc( 'fc_value4', self.ox, 1, activation='linear') self.params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name) # Advantage critic self.adv_critic = tf.subtract(self.critic_target_ph, tf.reshape(self.output_layer_v, [-1])) # Actor objective # Multiply the output of the network by a one hot vector, 1 for the # executed action. This will make the non-regularised objective # term for non-selected actions to be zero. self.log_output_selected_action = tf.reduce_sum( (self.log_output_layer_pi + self.selected_repeat_prob) * self.selected_action_ph, axis=1 ) actor_objective_advantage_term = tf.multiply( self.log_output_selected_action, self.adv_actor_ph ) actor_objective_entropy_term = self.beta * self.output_layer_entropy self.actor_objective = -tf.reduce_mean( actor_objective_advantage_term + actor_objective_entropy_term ) # Critic loss if self.clip_loss_delta > 0: quadratic_part = tf.reduce_mean(tf.pow( tf.minimum( tf.abs(self.adv_critic), self.clip_loss_delta ), 2)) linear_part = tf.subtract(tf.abs(self.adv_critic), quadratic_part) #OBS! For the standard L2 loss, we should multiply by 0.5. However, the authors of the paper # recommend multiplying the gradients of the V function by 0.5. Thus the 0.5 self.critic_loss = tf.multiply(tf.constant(0.5), tf.nn.l2_loss(quadratic_part) + \ self.clip_loss_delta * linear_part) else: self.critic_loss = 0.5 * tf.reduce_mean(tf.pow(self.adv_critic, 2)) self.loss = self.actor_objective + self.critic_loss # Optimizer grads = tf.gradients(self.loss, self.params) if self.clip_norm_type == 'ignore': # Unclipped gradients self.get_gradients = grads elif self.clip_norm_type == 'global': # Clip network grads by network norm self.get_gradients = tf.clip_by_global_norm( grads, self.clip_norm)[0] elif self.clip_norm_type == 'local': # Clip layer grads by layer norm self.get_gradients = [tf.clip_by_norm( g, self.clip_norm) for g in grads] # Placeholders for shared memory vars self.params_ph = [] for p in self.params: self.params_ph.append(tf.placeholder(tf.float32, shape=p.get_shape(), name="shared_memory_for_{}".format( (p.name.split("/", 1)[1]).replace(":", "_")))) # Ops to sync net with shared memory vars self.sync_with_shared_memory = [] for i in range(len(self.params)): self.sync_with_shared_memory.append( self.params[i].assign(self.params_ph[i]))
def gammaln(self, x): return tf.lgamma(x)
def poisson(x, lam): return x * tf.log(lam) - lam - tf.lgamma(x + 1.)
def main(): argc = len(sys.argv) if (argc != 2): print("Usage: [Data.csv]") exit() ## Read Data from file into a data frame DATA = sys.argv[1] data_df = pd.read_csv(DATA) num_data_columns = len(data_df.columns) num_dim = num_data_columns hypergeom_p = 1 hypergeom_q = hypergeom_p - 1 session_string = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) print( "Data appears to have {} columns. Assuming multivariate probability distribution with {} varaibles." .format(num_data_columns, num_data_columns)) log_file_string = "MELL_log_{}.txt".format(session_string) ## A random string for this session deatils ## Set to floating point data data = data_df.astype('float64') num_rows = data.shape[0] print("There are {} rows".format(num_rows)) ########################## ## Inputs and constants ## ########################## EPOCHS = 1 n_batches = 50 model_weight = 1.0 normalisation_weight = 100.0 #R_squared_weight = 1.0 #intercept_weight = 1.0 #gradient_weight = 100.0 # Create a placeholder to dynamically switch between batch sizes batch_size = tf.placeholder(tf.int64) drop_prob = tf.placeholder(tf.float32) ## Training Boolean training_bool = tf.placeholder(tf.bool) ################## ## Initializers ## ################## weight_initialiser_mean = 0.0 weight_initialiser_std = 0.000000001 ## Question do we need a separate initializer for everything? #weight_initer = tf.truncated_normal_initializer(mean=weight_initialiser_mean, stddev=weight_initialiser_std) #weight_initer = tf.constant_initializer(np.eye(num_gamma,num_dim),dtype=tf.float32) #avec_initer = tf.random_uniform_initializer(minval=0.90,maxval=1.1,dtype=tf.float32) #alpha_initer = tf.truncated_normal_initializer(mean=0.0, stddev=0.5) #beta_initer = tf.truncated_normal_initializer(mean=0.0, stddev=0.5) #q_initer = tf.constant_initializer(np.eye(num_dim),dtype=tf.float32) #matrix_M_init = tf.constant_initializer(np.eye(num_dim,num_dim),dtype=tf.float32) matrix_V_init = tf.truncated_normal_initializer( mean=weight_initialiser_mean, stddev=weight_initialiser_std) matrix_W_init = tf.truncated_normal_initializer( mean=weight_initialiser_mean, stddev=weight_initialiser_std) param_a_init = tf.truncated_normal_initializer(mean=1.0, stddev=0.000000001) param_b_init = tf.truncated_normal_initializer(mean=1.0, stddev=0.1) eta_init = tf.truncated_normal_initializer(mean=1.0, stddev=0.1) ######################## ## Input Placeholders ## ######################## ## The data objects, x input data, S set of input exponents x = tf.placeholder(tf.float32, shape=[None, num_dim]) S = tf.placeholder(tf.float32, shape=[None, num_dim]) ########################## ## Restart placeholders ## For continuing and restarting if errors occur ########################## #eta_reset_input = tf.placeholder( tf.float32, shape=[num_dim]) #q_reset_input = tf.placeholder( tf.float32, shape=[num_dim,num_dim]) #w_reset_input = tf.placeholder( tf.float32, shape=[num_gamma, num_dim]) #alpha_reset_input = tf.placeholder( tf.float32, shape=[num_gamma]) #beta_reset_input = tf.placeholder( tf.float32, shape=[num_dim]) #a_reset_input = tf.placeholder( tf.float32, shape=[num_gamma]) ############################ ## Iterators and features ## ############################ ds_buffer_constant = 1000 dataset = tf.data.Dataset.from_tensor_slices(x).shuffle( buffer_size=ds_buffer_constant).batch(batch_size).repeat() iter = tf.data.Iterator.from_structure(dataset.output_types, dataset.output_shapes) features = iter.get_next() dataset_init_op = iter.make_initializer(dataset) print( "TO ADD: Scale parameter exponenets must equal variable exponenets in the series expansion via GRMT!" ) print( "TO ADD: Write best models to file as we go (periodically to not loose data) : Checkpointing" ) print("TO ADD: Parameter files : Check it works first!") print("TO ADD: Add initialiser constants to log") print("Put a condition on the sign of the moment?") print("Num gammas must be greater or equal to num dim") print( "Treat some of the gammas as a subset which are a function of the scale weights" ) print( "How does GMRT det A come into this? will there be an additional scaling factor?" ) print( "Complex network and ability to minimise the complex parts in real predictions? Then we can include the (-1)^(q.s+beta) kind of terms" ) print( "Start with a product of marginals (which may be quicker/easier to train) then start with an expression that represents that product for the full distribution" ) print("#####") print( "Check with simple function! How close can loss get to zero? Can we model this with a noise distribution?" ) print( "Can we find the s which maximises the error for a given function? Can we work on that s only?" ) print( "Consider a loss function that is signed for small errors such that they cancel if they are noise but do not if they are large?" ) print("For syymetry use the sum of the above function flipped!?") print("Check Autonorm is working correctly") print("Check Error expectations are working correctly") print("Check range 0-1 in s is acceptable?") ## Make a log of parameters with open(log_file_string, "w") as f: f.write("Session {} created {}\n".format(session_string, datetime.datetime.now())) f.write("Data of shape {}\n".format(data.shape)) f.write("Number of dimensions = {}\n".format(num_dim)) f.write("Hypergeometric p = {}\n".format(hypergeom_p)) f.write("Hypergeometric q = {}\n".format(hypergeom_q)) f.write("EPOCHS = {}\n".format(EPOCHS)) f.write("n_batches = {}\n".format(EPOCHS)) f.write("normalisation_weight = {}\n".format(normalisation_weight)) #f.write("intercept_weight = {}\n".format(intercept_weight)) #f.write("gradient_weight = {}\n".format(gradient_weight)) #f.write("R_squared_weight = {}\n".format(R_squared_weight)) f.write("ds_buffer_constant = {}\n".format(ds_buffer_constant)) ##################### ## Model Variables ## ##################### eta = tf.get_variable(name="eta", dtype=tf.float32, shape=[num_dim], initializer=eta_init) matrix_V = tf.get_variable(name="V", dtype=tf.float32, shape=[hypergeom_p, num_dim], initializer=matrix_V_init) matrix_W = tf.get_variable(name="W", dtype=tf.float32, shape=[hypergeom_q, num_dim], initializer=matrix_W_init) param_a = tf.get_variable(name="a", dtype=tf.float32, shape=[hypergeom_p], initializer=param_a_init) param_b = tf.get_variable(name="b", dtype=tf.float32, shape=[hypergeom_q], initializer=param_b_init) eta_input = tf.placeholder(dtype=tf.float32, shape=[num_dim]) param_a_input = tf.placeholder(dtype=tf.float32, shape=[hypergeom_p]) param_b_input = tf.placeholder(dtype=tf.float32, shape=[hypergeom_q]) matrix_V_input = tf.placeholder(dtype=tf.float32, shape=[hypergeom_p, num_dim]) matrix_W_input = tf.placeholder(dtype=tf.float32, shape=[hypergeom_q, num_dim]) print("CHECK: use locking status and effect?") eta_assign_op = tf.assign(eta, eta_input, use_locking=False) param_a_assign_op = tf.assign(param_a, param_a_input, use_locking=False) param_b_assign_op = tf.assign(param_b, param_b_input, use_locking=False) matrix_V_assign_op = tf.assign(matrix_V, matrix_V_input, use_locking=False) matrix_W_assign_op = tf.assign(matrix_W, matrix_W_input, use_locking=False) ##################### ## Model Equations ## ##################### print("Warning rawmoments are being used!!") print("Optimisation: lgamma(snorm) = np.zeroes([num_dim])") print("Optimisation: tf.reduce_sum(np.zeroes([num_dim])) = 0") print( "NOTE: param_a elements must be bigger than the row sums of V for positive arguments to loggamma(x)!" ) print( "NOTE: param_b elements must be bigger than the row sums of W for positive arguments to loggamma(x)!" ) ## Return the normalised moment prediction for the input S logXi_a = tf.reduce_sum( tf.lgamma(param_a)) ## Hypergeometric p normalisation coeffs logXi_b = tf.reduce_sum( tf.lgamma(param_b)) ## Hypergeometric q normalisation coeffs ## Things that depend on the exponent sample logXi_s = tf.map_fn(lambda s: tf.reduce_sum(tf.lgamma(s)), S, dtype=tf.float32) logXi_2s = tf.map_fn(lambda s: tf.reduce_sum(tf.lgamma(s)), 2.0 * S, dtype=tf.float32) logXi_a_minus_V_s = tf.map_fn(lambda s: tf.reduce_sum( tf.lgamma(param_a + tf.tensordot(matrix_V, s, axes=[[1], [0]]))), S, dtype=tf.float32) logXi_b_minus_W_s = tf.map_fn(lambda s: tf.reduce_sum( tf.lgamma(param_b + tf.tensordot(matrix_W, s, axes=[[1], [0]]))), S, dtype=tf.float32) logXi_a_minus_V_2s = tf.map_fn(lambda s: tf.reduce_sum( tf.lgamma(param_a + tf.tensordot(matrix_V, s, axes=[[1], [0]]))), 2.0 * S, dtype=tf.float32) logXi_b_minus_W_2s = tf.map_fn(lambda s: tf.reduce_sum( tf.lgamma(param_b + tf.tensordot(matrix_W, s, axes=[[1], [0]]))), 2.0 * S, dtype=tf.float32) ## Calculate the log of the analytic moments (as a vector over samples in S) #logM = logXi_b + logXi_a_minus_V_s + logXi_s - logXi_a - logXi_b_minus_W_s #logM2s = logXi_b + logXi_a_minus_V_2s + logXi_2s - logXi_a - logXi_b_minus_W_2s s_norm = tf.ones(num_dim) #logXi_s_norm = tf.reduce_sum(tf.lgamma(s_norm)) logXi_a_minus_V_s_norm = tf.reduce_sum( tf.lgamma(param_a + tf.tensordot(matrix_V, s_norm, axes=[[1], [0]]))) logXi_b_minus_W_s_norm = tf.reduce_sum( tf.lgamma(param_b + tf.tensordot(matrix_W, s_norm, axes=[[1], [0]]))) norm_logM = logXi_b + logXi_a_minus_V_s_norm - logXi_a - logXi_b_minus_W_s_norm norm_M = tf.exp( norm_logM) ## Get the raw moment for normalisation (dangerous?) logM_diff = logXi_a_minus_V_s + logXi_s - logXi_b_minus_W_s - logXi_a_minus_V_s_norm + logXi_b_minus_W_s_norm M = tf.exp(logM_diff) ## Get the raw moment (dangerous?) logM2s_diff = logXi_a_minus_V_2s + logXi_2s - logXi_b_minus_W_2s - logXi_a_minus_V_s_norm + logXi_b_minus_W_s_norm M2s = tf.exp(logM2s_diff) ## Get the raw moment (dangerous?) #std = tf.sqrt(M2s - tf.square(M)) ## Calculate the std #std_error = std/tf.sqrt(1.0*num_rows) ## Divide by the number of samples to get the expected margin under this estimator M_var = tf.subtract(M2s, tf.square(M)) #error_term = tf.divide(M2s - tf.square(M), 1.0*num_rows) print("Warning, messed with normalisation!!") norm_target = tf.constant(1.0, dtype=tf.float32) ######################################################################################### ## Values Derived From Data (The things we are trying to fit the analytic function to) ## ######################################################################################### data_exponents = tf.map_fn(lambda s: tf.subtract(s, tf.constant(1.0)), S) data_moments = tf.map_fn(lambda s: tf.pow(features, s), data_exponents) E = tf.map_fn(lambda mom: tf.reduce_mean(tf.reduce_prod(mom, axis=1)), data_moments, dtype=tf.float32) ## Careful of overflow here ##reduce to variance def reduce_var(x, axis=None, keepdims=False): m = tf.reduce_mean(x, axis=axis, keep_dims=True) devs_squared = tf.square(x - m) return tf.reduce_mean(devs_squared, axis=axis, keep_dims=keepdims) #E_var = tf.map_fn( lambda mom : reduce_var(tf.reduce_prod(mom,axis=1)), data_moments, dtype = tf.float32 ) ## Careful of overflow here #logE = tf.log(E) ## Possibly useless ################### ## Loss function ## ################### ##Caluclate the R^2 coefficient of the set of S datapoints #mean_log_E = tf.reduce_mean(logEfunc) #mean_log_M = tf.reduce_mean(logMfunc) #total_error = tf.reduce_sum(tf.square(tf.subtract(logEfunc,mean_log_E))) #unexplained_error = tf.reduce_sum(tf.square(tf.subtract(logEfunc, logMfunc))) #R_squared = tf.subtract(tf.constant(1.0,dtype=tf.float32), tf.divide(unexplained_error, total_error)) #logM_error = tf.reduce_sum(tf.square(tf.subtract(logMfunc,mean_log_M))) #derived_gradient = tf.divide(tf.reduce_sum(tf.multiply(logM_error,total_error)),tf.reduce_sum(tf.multiply(logM_error,logM_error))) #derived_intercept = mean_log_E - tf.multiply(derived_gradient, mean_log_M) ## Define the losses here ## #intercept_loss = intercept_weight * tf.losses.mean_squared_error(derived_intercept,tf.constant(0.0,dtype=tf.float32)) #gradient_loss = gradient_weight * tf.losses.mean_squared_error(derived_gradient,tf.constant(1.0,dtype=tf.float32)) #R_squared_loss = R_squared_weight * tf.losses.mean_squared_error(R_squared,tf.constant(1.0,dtype=tf.float32)) ## Get the loss associated with points beyond the standard error #model_loss = model_weight * tf.reduce_sum(tf.maximum(0.0, tf.square(M-E) - error_term )) #var_weight = tf.constant(1.0) model_loss = model_weight * tf.losses.mean_squared_error(M, E) norm_loss = normalisation_weight * tf.losses.mean_squared_error( norm_M, norm_target) #var_loss = tf.multiply(var_weight,tf.losses.mean_squared_error(M_var,E_var)) #gamma_loss = integer_gamma_weight*tf.reduce_sum(tf.map_fn(lambda i : tf.abs(tf.abs(i)*(tf.abs(i)-1)),a)) ## These require a definition of s which are allowed (can this be for s in 1 to 2?) #positive_argument_a_V_loss = tf. tf.max() #positive_argument_b_W_loss = ... ## Define the total loss loss = model_loss + norm_loss #+ R_squared_loss + intercept_loss + gradient_loss print("Potential issue using one optimizer for different routines") optimiser = tf.train.AdamOptimizer() #optimiser = tf.train.GradientDescentOptimizer(0.001) ## Define training operations for independant sub-losses train_op = optimiser.minimize(loss) norm_op = optimiser.minimize(norm_loss) #model_op = optimiser.minimize(model_loss) #R_squared_op = optimiser.minimize(R_squared_loss) #gradient_op = optimiser.minimize(gradient_loss) #intercept_op = optimiser.minimize(intercept_loss) num_best_models = 1 best_models_list = [] largest_min_loss = 1e90 def queue_compare(current_list, contender): clip_num = num_best_models if (len(current_list) < clip_num): current_list.append(contender) else: ## Seach for the correct place, add the contender in there and pop the end of the list till clip num elements index = clip_num for i in range(len(current_list)): if (contender[0] < current_list[i][0]): index = i break if (i < clip_num): current_list.insert(index, contender) while (len(current_list) > clip_num): current_list.pop(-1) return sorted(current_list, key=lambda x: x[0]) ## TF graph write for chart viewing writer = tf.summary.FileWriter('./graphs', tf.get_default_graph()) num_train_exponent_samples = 200 def get_exponent_samples(): samps = np.ones([num_dim]) + np.random.uniform( 0.0, 4.0, size=[num_train_exponent_samples, num_dim]) drop_prob = 0.2 for i in range(len(samps)): for j in range(len(samps[i])): if (np.random.uniform(0, 1) < drop_prob): samps[i][j] = 1.0 return samps ############# ## Session ## ############# epoch_min_max_thresholds = [] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(EPOCHS): if (epoch % 5 == 0): s_list = get_exponent_samples() sess.run(dataset_init_op, feed_dict={ x: data, S: s_list, batch_size: num_rows, drop_prob: 0.5, training_bool: True }) tot_loss = 0.0 for _ in range(n_batches): #_, loss_value, prlogE, prlogM, prnorm_logM, ml, nl, rl, il, grl = sess.run([train_op, loss, logE, logM, norm_logM, model_loss, norm_loss, R_squared_loss, intercept_loss, gradient_loss], feed_dict={ S:s_list, drop_prob : 0.5, training_bool : True}) #_, loss_value, prnorm_M, ml, nl = sess.run([train_op, loss, norm_M, model_loss, norm_loss], feed_dict={ S:s_list, batch_size : num_rows, drop_prob : 0.5, training_bool : True}) loss_value, prnorm_M, ml, nl = sess.run( [loss, norm_M, model_loss, norm_loss], feed_dict={ S: s_list, batch_size: num_rows, drop_prob: 0.5, training_bool: True }) tot_loss += loss_value print("Iter: {}, Loss: {:.6f}".format(epoch, tot_loss / n_batches)) print("norm M = {}".format(prnorm_M)) print("model, norm = {},{}".format(ml, nl)) #print(test) #maxl = max(ml,nl,rl,il,grl) if (not np.isnan(tot_loss) and tot_loss / n_batches < largest_min_loss): b_eta, b_W, b_V, b_a, b_b = sess.run( [eta, matrix_W, matrix_V, param_a, param_b]) best_models_list = queue_compare( best_models_list, [tot_loss / n_batches, [b_eta, b_W, b_V, b_a, b_b]]) largest_min_loss = max( [element[0] for element in best_models_list]) smallest_min_loss = min( [element[0] for element in best_models_list]) print("BEST VALUE ADDED! Threshold = {} to {}".format( smallest_min_loss, largest_min_loss)) epoch_min_max_thresholds.append( (epoch, smallest_min_loss, largest_min_loss)) if (np.isnan(tot_loss)): print("\n\n RESETTING \n\n") sess.run(tf.global_variables_initializer()) sess.run(eta_assign_op, feed_dict={eta_input: best_models_list[0][1][0]}) sess.run(matrix_W_assign_op, feed_dict={matrix_W_input: best_models_list[0][1][1]}) sess.run(matrix_V_assign_op, feed_dict={matrix_V_input: best_models_list[0][1][2]}) sess.run(param_a_assign_op, feed_dict={param_a_input: best_models_list[0][1][3]}) sess.run(param_b_assign_op, feed_dict={param_b_input: best_models_list[0][1][4]}) #elif(maxl==nl): # for _ in range(10) : sess.run([norm_op], feed_dict={ S:s_list, drop_prob : 0.5, training_bool : True}) #elif(maxl==rl): # for _ in range(10) : sess.run([R_squared_op], feed_dict={ S:s_list, drop_prob : 0.5, training_bool : True}) #elif(maxl==ml): # for _ in range(10) : sess.run([model_op], feed_dict={ S:s_list, drop_prob : 0.5, training_bool : True}) #elif(maxl==il): # for _ in range(10) : sess.run([intercept_op], feed_dict={ S:s_list, drop_prob : 0.5, training_bool : True}) #elif(maxl==grl): # for _ in range(10) : sess.run([gradient_op], feed_dict={ S:s_list, drop_prob : 0.5, training_bool : True}) print("Run finished Writing Files...") with open("Output_Mell_{}.txt".format(session_string), "w") as f: for i in range(500): s_list = [[1.0]] + np.random.exponential(scale=2.0, size=[1, num_dim]) E_obs, M_obs = sess.run([E, M], feed_dict={ S: s_list, drop_prob: 0.5, training_bool: True }) for sval in s_list[0]: f.write("{},".format(sval)) f.write("{},{}\n".format(E_obs[0], M_obs[0])) with open("Output_Mell_{}_Best_100_Models.txt".format(session_string), "w") as f: for i in best_models_list: f.write("{}:{}\n".format(i[0], i[1])) ## Load the best model back in model = 0 sess.run(eta_assign_op, feed_dict={eta_input: best_models_list[model][1][0]}) sess.run(matrix_W_assign_op, feed_dict={matrix_W_input: best_models_list[model][1][1]}) sess.run(matrix_V_assign_op, feed_dict={matrix_V_input: best_models_list[model][1][2]}) sess.run(param_a_assign_op, feed_dict={param_a_input: best_models_list[model][1][3]}) sess.run(param_b_assign_op, feed_dict={param_b_input: best_models_list[model][1][4]}) with open("Output_Best_Mell_{}.txt".format(session_string), "w") as f: for i in range(500): s_list = [[1.0]] + np.random.exponential(scale=2.0, size=[1, num_dim]) E_obs, M_obs = sess.run([E, M], feed_dict={ S: s_list, drop_prob: 0.5, training_bool: True }) for sval in s_list[0]: f.write("{},".format(sval)) f.write("{},{}\n".format(E_obs[0], M_obs[0])) ### Make a huge array of predictions using the best 100 models with open("Output_Best_Models_Mell_{}.txt".format(session_string), "w") as f: for i in range(num_dim): f.write("s{},".format(i)) f.write("E,") for i in range(num_best_models): f.write("M{},".format(i)) f.write("\n") for i in range(500): s_list = [[1.0]] + np.random.exponential(scale=2.0, size=[1, num_dim]) for sval in s_list[0]: f.write("{},".format(sval)) E_obs = sess.run(E, feed_dict={ S: s_list, drop_prob: 0.5, training_bool: True }) f.write("{},".format(E_obs[0])) for model in range(num_best_models): sess.run( eta_assign_op, feed_dict={eta_input: best_models_list[model][1][0]}) sess.run(matrix_W_assign_op, feed_dict={ matrix_W_input: best_models_list[model][1][1] }) sess.run(matrix_V_assign_op, feed_dict={ matrix_V_input: best_models_list[model][1][2] }) sess.run(param_a_assign_op, feed_dict={ param_a_input: best_models_list[model][1][3] }) sess.run(param_b_assign_op, feed_dict={ param_b_input: best_models_list[model][1][4] }) M_obs = sess.run(M, feed_dict={ S: s_list, drop_prob: 0.5, training_bool: True }) f.write("{},".format(M_obs[0])) f.write("\n") ### Print parameters of best model into a file for reading with open("Output_params_{}.txt".format(session_string), "w") as f: for param in sess.run([eta, matrix_W, matrix_V, param_a, param_b]): f.write("{}\n".format(param))
def _ais_gets_correct_log_normalizer(self, init, independent_chain_ndims, sess, feed_dict=None): counter = collections.Counter() def proposal_log_prob(x): counter['proposal_calls'] += 1 event_dims = tf.range(independent_chain_ndims, tf.rank(x)) return tf.reduce_sum(tfd.Normal(loc=0., scale=1.).log_prob(x), axis=event_dims) def target_log_prob(x): counter['target_calls'] += 1 event_dims = tf.range(independent_chain_ndims, tf.rank(x)) return self._log_gamma_log_prob(x, event_dims) if feed_dict is None: feed_dict = {} num_steps = 200 def make_kernel(tlp_fn): return tfp.mcmc.HamiltonianMonteCarlo(target_log_prob_fn=tlp_fn, step_size=0.5, num_leapfrog_steps=2, seed=45) _, ais_weights, _ = tfp.mcmc.sample_annealed_importance_chain( num_steps=num_steps, proposal_log_prob_fn=proposal_log_prob, target_log_prob_fn=target_log_prob, current_state=init, make_kernel_fn=make_kernel, parallel_iterations=1) # We have three calls because the calculation of `ais_weights` entails # another call to the `convex_combined_log_prob_fn`. We could refactor # things to avoid this, if needed (eg, b/72994218). self.assertAllEqual(dict(target_calls=3, proposal_calls=3), counter) event_shape = tf.shape(init)[independent_chain_ndims:] event_size = tf.reduce_prod(event_shape) log_true_normalizer = (-self._shape_param * tf.log(self._rate_param) + tf.lgamma(self._shape_param)) log_true_normalizer *= tf.cast(event_size, log_true_normalizer.dtype) log_estimated_normalizer = (tf.reduce_logsumexp(ais_weights) - np.log(num_steps)) ratio_estimate_true = tf.exp(ais_weights - log_true_normalizer) ais_weights_size = tf.size(ais_weights) standard_error = tf.sqrt( _compute_sample_variance(ratio_estimate_true) / tf.cast(ais_weights_size, ratio_estimate_true.dtype)) [ ratio_estimate_true_, log_true_normalizer_, log_estimated_normalizer_, standard_error_, ais_weights_size_, event_size_, ] = sess.run([ ratio_estimate_true, log_true_normalizer, log_estimated_normalizer, standard_error, ais_weights_size, event_size, ], feed_dict) tf.logging.vlog( 1, ' log_true_normalizer: {}\n' ' log_estimated_normalizer: {}\n' ' ais_weights_size: {}\n' ' event_size: {}\n'.format( log_true_normalizer_, log_estimated_normalizer_, ais_weights_size_, event_size_)) self.assertNear(ratio_estimate_true_.mean(), 1., 4. * standard_error_)
def _log_normalization(self): return (tf.lgamma(self.concentration1) + tf.lgamma(self.concentration0) - tf.lgamma(self.total_concentration))
def _multi_lgamma(self, a, p, name="multi_lgamma"): """Computes the log multivariate gamma function; log(Gamma_p(a)).""" with self._name_scope(name, values=[a, p]): seq = self._multi_gamma_sequence(a, p) return (0.25 * p * (p - 1.) * math.log(math.pi) + tf.reduce_sum(tf.lgamma(seq), axis=[-1]))
def variational_expectations(self, Fmu, Fvar, Y): if self.invlink is tf.exp: return Y * Fmu - tf.exp(Fmu + Fvar / 2) * self.binsize \ - tf.lgamma(Y + 1) + Y * tf.log(self.binsize) return super(Poisson, self).variational_expectations(Fmu, Fvar, Y)
def gamma_loss(z_over_a, log_alpha, alpha, log_beta, beta): loss = -alpha * log_beta + ( alpha + z_over_a) * tf.log(1 + beta) - tf.lgamma( alpha + z_over_a) + tf.lgamma(alpha) + tf.lgamma(z_over_a + 1) return K.mean(loss)