def _cumulative_hazard(self, params, T, Xs): c = expit(np.dot(Xs["beta_"], params["beta_"])) lambda_ = np.exp(np.dot(Xs["lambda_"], params["lambda_"])) rho_ = np.exp(np.dot(Xs["rho_"], params["rho_"])) survival = np.exp(-((T / lambda_)**rho_)) return -np.log((1 - c) * 1.0 + c * survival)
def rvs(self, x, n_curves=1, n_samples=1, T=None): # Samples values from this distribution # T is optional and means we already observed non-conversion until T assert self._ci # Need to be fit with MCMC if T is None: T = numpy.zeros((n_curves, n_samples)) else: assert T.shape == (n_curves, n_samples) B = numpy.zeros((n_curves, n_samples), dtype=numpy.bool) C = numpy.zeros((n_curves, n_samples)) params = self.params['samples'] for i, j in enumerate( numpy.random.randint(len(params['k']), size=n_curves)): k = params['k'][j] p = params['p'][j] lambd = exp(dot(x, params['alpha'][j]) + params['a'][j]) c = expit(dot(x, params['beta'][j]) + params['b'][j]) z = numpy.random.uniform(size=(n_samples, )) cdf_now = c * gammainc(k, numpy.multiply.outer(T[i], lambd)** p) # why is this outer? adjusted_z = cdf_now + (1 - cdf_now) * z B[i] = (adjusted_z < c) y = adjusted_z / c w = gammaincinv(k, y) # x = (t * lambd)**p C[i] = w**(1. / p) / lambd C[i][~B[i]] = 0 return B, C
def generalized_gamma_loss(x, X, B, T, W, fix_k, fix_p, hierarchical, flavor, callback=None): # parameters for this distribution is p, k, lambd k = exp(x[0]) if fix_k is None else fix_k # x[0], x[1], x p = exp(x[1]) if fix_p is None else fix_p log_sigma_alpha = x[2] log_sigma_beta = x[3] a = x[4] b = x[5] n_features = int((len(x)-6)/2) alpha = x[6:6+n_features] beta = x[6+n_features:6+2*n_features] lambd = exp(dot(X, alpha)+a) # lambda = exp(\alpha+a), X shape is N * n_groups, alpha is \n_features * 1 # PDF: p*lambda^(k*p) / gamma(k) * t^(k*p-1) * exp(-(x*lambda)^p) log_pdf = log(p) + (k*p) * log(lambd) - gammaln(k) \ + (k*p-1) * log(T) - (T*lambd)**p cdf = gammainc(k, (T*lambd)**p) if flavor == 'logistic': # Log-likelihood with sigmoid c = expit(dot(X, beta)+b) # fit one beta for each group LL_observed = log(c) + log_pdf LL_censored = log((1 - c) + c * (1 - cdf)) elif flavor == 'linear': # L2 loss, linear c = dot(X, beta)+b LL_observed = -(1 - c)**2 + log_pdf LL_censored = -(c*cdf)**2 LL_data = sum( W * B * LL_observed + W * (1 - B) * LL_censored, 0) \ - n_features*log_sigma_alpha
def callback(combined_params, t, combined_gradient): params, est_params = combined_params grad_params, grad_est = combined_gradient log_temperature, log_eta = est_params temperatures.append(np.exp(log_temperature)) etas.append(np.exp(log_eta)) if t % 10 == 0: objective_val, grads, est_grads = mc_objective_and_var(combined_params, t) print("Iteration {} objective {}".format(t, np.mean(objective_val))) ax1.cla() ax1.plot(expit(params), 'r') ax1.set_ylabel('parameter values') ax1.set_ylim([0, 1]) ax2.cla() ax2.plot(grad_params, 'g') ax2.set_ylabel('average gradient') ax3.cla() ax3.plot(temperatures, 'b') ax3.set_ylabel('temperature') ax4.cla() ax4.plot(etas, 'b') ax4.set_ylabel('eta') ax4.set_xlabel('iteration') plt.draw() plt.pause(1.0/30.0)
def cdf_posteriori(self, x, t, ci=None): '''Returns the value of the cumulative distribution function for a fitted model. :param x: feature vector (or matrix) :param t: time :param ci: if this is provided, and the model was fit with `ci = True`, then the return value will be the trace samples generated via the MCMC steps. If this is not provided, then the max a posteriori prediction will be used. ''' x = numpy.array(x) t = numpy.array(t) if ci is None: params = self.params["map"] else: assert self._ci params = self.params["samples"] t = numpy.expand_dims(t, -1) lambd = exp(dot(x, params["alpha"].T) + params["a"]) if self._flavor == "logistic": c = expit(dot(x, params["beta"].T) + params["b"]) elif self._flavor == "linear": c = dot(x, params["beta"].T) + params["b"] M = c * gammainc(params["k"], (t * lambd) ** params["p"]) return M
def _cumulative_hazard(self, params, T, Xs): c = expit(np.dot(Xs["beta_"], params["beta_"])) lambda_ = np.exp(np.dot(Xs["lambda_"], params["lambda_"])) rho_ = np.exp(np.dot(Xs["rho_"], params["rho_"])) cdf = 1 - np.exp(-(T / lambda_) ** rho_) return -np.log((1 - c) + c * (1 - cdf))
def logistic_normal_integral_approx(mu, var): """ Approximates the logistic normal integral, E[logit^{-1}(X)], where X ~ N(mu, var). """ gamma = np.sqrt(1 + (np.pi * (var / 8))) return expit(mu / gamma)
def _predict(self, params, x, t): lambd = exp(dot(x, params['alpha'].T) + params['a']) if self._flavor == 'logistic': c = expit(dot(x, params['beta'].T) + params['b']) elif self._flavor == 'linear': c = dot(x, params['beta'].T) + params['b'] M = c * gammainc(params['k'], (t * lambd)**params['p']) return M
def generalized_gamma_loss(x, X, B, T, W, fix_k, fix_p, hierarchical, flavor, callback=None): k = exp(x[0]) if fix_k is None else fix_k p = exp(x[1]) if fix_p is None else fix_p log_sigma_alpha = x[2] log_sigma_beta = x[3] a = x[4] b = x[5] n_features = int((len(x) - 6) / 2) alpha = x[6:6 + n_features] beta = x[6 + n_features:6 + 2 * n_features] lambd = exp(dot(X, alpha) + a) # PDF: p*lambda^(k*p) / gamma(k) * t^(k*p-1) * exp(-(x*lambda)^p) log_pdf = log(p) + (k*p) * log(lambd) - gammaln(k) \ + (k*p-1) * log(T) - (T*lambd)**p cdf = gammainc(k, (T * lambd)**p) if flavor == 'logistic': # Log-likelihood with sigmoid c = expit(dot(X, beta) + b) LL_observed = log(c) + log_pdf LL_censored = log((1 - c) + c * (1 - cdf)) elif flavor == 'linear': # L2 loss, linear c = dot(X, beta) + b LL_observed = -(1 - c)**2 + log_pdf LL_censored = -(c * cdf)**2 LL_data = sum(W * B * LL_observed + W * (1 - B) * LL_censored, 0) if hierarchical: # Hierarchical model with sigmas ~ invgamma(1, 1) LL_prior_a = -4*log_sigma_alpha - 1/exp(log_sigma_alpha)**2 \ - dot(alpha, alpha) / (2*exp(log_sigma_alpha)**2) \ - n_features*log_sigma_alpha LL_prior_b = -4*log_sigma_beta - 1/exp(log_sigma_beta)**2 \ - dot(beta, beta) / (2*exp(log_sigma_beta)**2) \ - n_features*log_sigma_beta LL = LL_prior_a + LL_prior_b + LL_data else: LL = LL_data if isnan(LL): return -numpy.inf if callback is not None: callback(LL) return LL
def sim(self, s): spikes = np.ones(len(s)) * self.t_max + 1 for u in range(1, self.t_max): v_i = np.multiply(np.exp(s), self.constant[u, :]) lambda_u = expit(v_i - self.v_thresh) spikes = np.where( (np.random.binomial(1, lambda_u)) & (spikes > self.t_max), u, spikes) return spikes.astype(np.int32)
def log_like(self, s, t): """ Calculates log likelihood based on LIF likelihood Args: s (): estimated gain of stimulation in space t (): spike timings Returns: """ v = np.einsum('i,ij->ij', np.exp(s), self.const_mat) p = expit(v - self.v_thresh) logp = np.sum(np.log(1 - p), 1) logp = logp + np.multiply( t < self.t_max, -np.log(1 - p[self.t_idx]) + np.log(p[self.t_idx])) return np.nan_to_num(logp)
def callback(params, t, gradient): grad_params = gradient[:D] if t % 10 == 0: objective_val, grads, grad_vars = mc_objective_and_var(params, t) print("Iteration {} objective {}".format(t, objective_val)) ax1.cla() ax1.plot(expit(params), 'r') ax1.set_ylabel('parameter values') ax1.set_ylim([0, 1]) ax2.cla() ax2.plot(grad_params, 'g') ax2.set_ylabel('average gradient') ax3.cla() ax3.plot(grad_vars, 'b') ax3.set_ylabel('gradient variance') ax3.set_xlabel('parameter index') plt.draw() plt.pause(1.0 / 30.0)
def cdf(self, x, t, ci=None): x = numpy.array(x) t = numpy.array(t) if ci is None: params = self.params['map'] else: assert self._ci params = self.params['samples'] lambd = exp(dot(x, params['alpha'].T) + params['a']) c = expit(dot(x, params['beta'].T) + params['b']) M = c * gammainc(params['k'], numpy.multiply.outer(t, lambd)**params['p']) if not ci: return M else: # Replace the last axis with a 3-element vector y = numpy.mean(M, axis=-1) y_lo = numpy.percentile(M, (1 - ci) * 50, axis=-1) y_hi = numpy.percentile(M, (1 + ci) * 50, axis=-1) return numpy.stack((y, y_lo, y_hi), axis=-1)
def cdf(self, x, t, ci=None): '''Returns the value of the cumulative distribution function for a fitted model. TODO: this should probably be renamed "predict" in the future to follow the scikit-learn convention. :param x: feature vector (or matrix) :param t: time :param ci: if this is provided, and the model was fit with `ci = True`, then the return value will contain one more dimension, and the last dimension will have size 3, containing the mean, the lower bound of the confidence interval, and the upper bound of the confidence interval. If this is not provided, then the max a posteriori prediction will be used. ''' x = numpy.array(x) t = numpy.array(t) if ci is None: params = self.params['map'] else: assert self._ci params = self.params['samples'] t = numpy.expand_dims(t, -1) lambd = exp(dot(x, params['alpha'].T) + params['a']) if self._flavor == 'logistic': c = expit(dot(x, params['beta'].T) + params['b']) elif self._flavor == 'linear': c = dot(x, params['beta'].T) + params['b'] M = c * gammainc( params['k'], (t*lambd)**params['p']) if not ci: return M else: # Replace the last axis with a 3-element vector y = numpy.mean(M, axis=-1) y_lo = numpy.percentile(M, (1-ci)*50, axis=-1) y_hi = numpy.percentile(M, (1+ci)*50, axis=-1) return numpy.stack((y, y_lo, y_hi), axis=-1)
def generalized_gamma_LL(x, X, B, T, W, fix_k, fix_p, hierarchical): k = exp(x[0]) if fix_k is None else fix_k p = exp(x[1]) if fix_p is None else fix_p log_sigma_alpha = x[2] log_sigma_beta = x[3] a = x[4] b = x[5] n_features = int((len(x) - 6) / 2) alpha = x[6:6 + n_features] beta = x[6 + n_features:6 + 2 * n_features] lambd = exp(dot(X, alpha) + a) c = expit(dot(X, beta) + b) # PDF: p*lambda^(k*p) / gamma(k) * t^(k*p-1) * exp(-(x*lambda)^p) log_pdf = log(p) + (k*p) * log(lambd) - gammaln(k) \ + (k*p-1) * log(T) - (T*lambd)**p cdf = gammainc(k, (T * lambd)**p) LL_observed = log(c) + log_pdf LL_censored = log((1 - c) + c * (1 - cdf)) LL_data = sum(W * B * LL_observed + W * (1 - B) * LL_censored, 0) if hierarchical: # Hierarchical model with sigmas ~ invgamma(1, 1) LL_prior_a = -4*log_sigma_alpha - 1/exp(log_sigma_alpha)**2 \ - dot(alpha, alpha) / (2*exp(log_sigma_alpha)**2) \ - n_features*log_sigma_alpha LL_prior_b = -4*log_sigma_beta - 1/exp(log_sigma_beta)**2 \ - dot(beta, beta) / (2**exp(log_sigma_beta**2)) \ - n_features*log_sigma_beta LL = LL_prior_a + LL_prior_b + LL_data else: LL = LL_data if isnan(LL): return -numpy.inf return LL
def callback(combined_params, t, combined_gradient): params, est_params = combined_params grad_params, grad_est = combined_gradient log_temperature, nn_params = est_params temperatures.append(np.exp(log_temperature)) if t % 10 == 0: objective_val, grads, est_grads = mc_objective_and_var( combined_params, t) print("Iteration {} objective {}".format(t, np.mean(objective_val))) ax1.cla() ax1.plot(expit(params), 'r') ax1.set_ylabel('parameter values') ax1.set_xlabel('parameter index') ax1.set_ylim([0, 1]) ax2.cla() ax2.plot(grad_params, 'g') ax2.set_ylabel('average gradient') ax2.set_xlabel('parameter index') ax3.cla() ax3.plot(np.var(grads), 'b') ax3.set_ylabel('gradient variance') ax3.set_xlabel('parameter index') ax4.cla() ax4.plot(temperatures, 'b') ax4.set_ylabel('temperature') ax4.set_xlabel('iteration') ax5.cla() xrange = np.linspace(0, 1, 200) f_tilde = lambda x: nn_predict(nn_params, x) f_tilde_map = map_and_stack(make_one_d(f_tilde, slice_dim, params)) ax5.plot(xrange, f_tilde_map(logit(xrange)), 'b') ax5.set_ylabel('1d slide of surrogate') ax5.set_xlabel('relaxed sample') plt.draw() plt.pause(1.0 / 30.0)
def callback(combined_params, t, combined_grads): params, temperature = combined_params grad_params, grad_temperature = combined_grads temperatures.append(temperature) if t % 10 == 0: objective_val, grad_vars = mc_objective_and_var(combined_params, t) print("Iteration {} objective {}".format(t, objective_val)) ax1.cla() ax1.plot(expit(params), 'r') ax1.set_ylabel('parameter values') ax1.set_ylim([0, 1]) ax2.cla() ax2.plot(grad_params, 'g') ax2.set_ylabel('average gradient') ax3.cla() ax3.plot(grad_vars, 'b') ax3.set_ylabel('gradient variance') ax3.set_xlabel('parameter index') ax4.cla() ax4.plot(temperatures, 'b') ax4.set_ylabel('temperature') plt.draw() plt.pause(1.0 / 30.0)
_gamma_from_natural_parameters, stats.gamma, _gamma_log_normalizer) # Beta. # TODO(mhoffman): log1p(negative(1)) yields a divide-by-zero. # TODO(mhoffman): Write rule to transform log(1 - x) into log1p(negative(x)). _add_distribution(SupportTypes.UNIT_INTERVAL, ['log(x)', 'log1p(negative(x))'], _beta_from_natural_parameters, stats.beta, _beta_log_normalizer) # Dirichlet. _add_distribution(SupportTypes.SIMPLEX, ['log(x)'], lambda alpha_minus_1: (alpha_minus_1 + 1, ), batch_dirichlet, _dirichlet_log_normalizer) # Bernoulli. # TODO(mhoffman): A more numerically stable softplus would be preferable. _add_distribution(SupportTypes.BINARY, ['x'], lambda logit_prob: (special.expit(logit_prob), ), stats.bernoulli, lambda logit_prob: np.sum(np.log1p(np.exp(logit_prob)))) # Categorical. def _softmax(x): safe_x = x - x.max(-1, keepdims=True) p = np.exp(safe_x) return p / p.sum(-1, keepdims=True) _add_distribution(SupportTypes.INTEGER, ['one_hot(x)'], lambda logit_probs: (_softmax(logit_probs), ), ph.categorical, lambda logit_probs: np.sum(misc.logsumexp(logit_probs, -1))) # Multinoulli. _add_distribution(SupportTypes.ONE_HOT, ['x'], lambda logit_probs:
def d_net_dx(self, x, k): return np.dot(np.dot(self.W[1].T, self.W[0].T**k), expit(x) * (1 - expit(x)))
def run_net(self, x): hidden = expit(np.dot(x.reshape(10, 1), self.W[0])) return np.dot(hidden, self.W[1])
not_found_normalizer = asserts_false ### Bernoulli distribution BernoulliSuffStat = namedtuple('BernoulliSuffStat', ['x']) x_matcher = make_matcher( pattern=EnvLookup('x'), preds=(), update_suffstat=( lambda suffstat, bindings, node: suffstat._replace(**{'x': node}))) bernoulli_matchers = frozenset([x_matcher]) bernoulli_check = lambda suffstat: not isinstance(suffstat.x, dict) bernoulli_log_normalizer = ( lambda natparam: np.sum(np.log1p(np.exp(natparam.x)))) bernoulli_distbn = ( lambda natparam: stats.bernoulli(special.expit(natparam.x))) bernoulli_defn = DistributionDefinition( matchers=bernoulli_matchers, support=SupportTypes.BINARY, check=bernoulli_check, suffstat_cls=BernoulliSuffStat, make_log_normalizer=lambda *args: bernoulli_log_normalizer, distribution=bernoulli_distbn) exp_family_stats.append(BernoulliSuffStat) distbn_defns.append(bernoulli_defn) ### Gamma distribution GammaSuffStat = namedtuple('GammaSuffStat', ['log_x', 'x']) log_x_matcher = make_matcher(
def function(self, x): return expit(x)
def softmax(z, log_temperature): temperature = np.exp(log_temperature) return expit(z / temperature)
def relaxed_bernoulli_sample(logit_theta, noise, log_temperature): return softmax(logistic_sample(noise, expit(logit_theta)), log_temperature)
def conditional_noise(logit_theta, samples, noise): # Computes p(u|b), where b = H(z), z = logit_theta + logit(noise), p(u) = U(0, 1) uprime = expit(-logit_theta) # u' = 1 - theta return samples * (noise * (1 - uprime) + uprime) + (1 - samples) * noise * uprime
def expected_objective(params): lst = list(itertools.product([0.0, 1.0], repeat=D)) return sum([objective(np.array(b)) * np.prod([expit(params[i] * (b[i] * 2.0 - 1.0)) for i in range(D)]) for b in lst])