def rvs(self, x, n_curves=1, n_samples=1, T=None): # Samples values from this distribution # T is optional and means we already observed non-conversion until T assert self._ci # Need to be fit with MCMC if T is None: T = numpy.zeros((n_curves, n_samples)) else: assert T.shape == (n_curves, n_samples) B = numpy.zeros((n_curves, n_samples), dtype=numpy.bool) C = numpy.zeros((n_curves, n_samples)) params = self.params['samples'] for i, j in enumerate( numpy.random.randint(len(params['k']), size=n_curves)): k = params['k'][j] p = params['p'][j] lambd = exp(dot(x, params['alpha'][j]) + params['a'][j]) c = expit(dot(x, params['beta'][j]) + params['b'][j]) z = numpy.random.uniform(size=(n_samples, )) cdf_now = c * gammainc(k, numpy.multiply.outer(T[i], lambd)** p) # why is this outer? adjusted_z = cdf_now + (1 - cdf_now) * z B[i] = (adjusted_z < c) y = adjusted_z / c w = gammaincinv(k, y) # x = (t * lambd)**p C[i] = w**(1. / p) / lambd C[i][~B[i]] = 0 return B, C
def generalized_gamma_LL(x, X, B, T, W, fix_k, fix_p, hierarchical, callback=None): k = exp(x[0]) if fix_k is None else fix_k p = exp(x[1]) if fix_p is None else fix_p log_sigma_alpha = x[2] log_sigma_beta = x[3] a = x[4] b = x[5] n_features = int((len(x) - 6) / 2) alpha = x[6:6 + n_features] beta = x[6 + n_features:6 + 2 * n_features] lambd = exp(dot(X, alpha) + a) c = expit(dot(X, beta) + b) # PDF: p*lambda^(k*p) / gamma(k) * t^(k*p-1) * exp(-(x*lambda)^p) log_pdf = log(p) + (k*p) * log(lambd) - gammaln(k) \ + (k*p-1) * log(T) - (T*lambd)**p cdf = gammainc(k, (T * lambd)**p) LL_observed = log(c) + log_pdf LL_censored = log((1 - c) + c * (1 - cdf)) LL_data = sum(W * B * LL_observed + W * (1 - B) * LL_censored, 0) if hierarchical: # Hierarchical model with sigmas ~ invgamma(1, 1) LL_prior_a = -4*log_sigma_alpha - 1/exp(log_sigma_alpha)**2 \ - dot(alpha, alpha) / (2*exp(log_sigma_alpha)**2) \ - n_features*log_sigma_alpha LL_prior_b = -4*log_sigma_beta - 1/exp(log_sigma_beta)**2 \ - dot(beta, beta) / (2*exp(log_sigma_beta)**2) \ - n_features*log_sigma_beta LL = LL_prior_a + LL_prior_b + LL_data else: LL = LL_data if isnan(LL): return -numpy.inf if callback is not None: callback(LL) return LL
def cdf(self, x, t, ci=None): x = numpy.array(x) t = numpy.array(t) if ci is None: params = self.params['map'] else: assert self._ci params = self.params['samples'] lambd = exp(dot(x, params['alpha'].T) + params['a']) c = expit(dot(x, params['beta'].T) + params['b']) M = c * gammainc(params['k'], numpy.multiply.outer(t, lambd)**params['p']) if not ci: return M else: # Replace the last axis with a 3-element vector y = numpy.mean(M, axis=-1) y_lo = numpy.percentile(M, (1 - ci) * 50, axis=-1) y_hi = numpy.percentile(M, (1 + ci) * 50, axis=-1) return numpy.stack((y, y_lo, y_hi), axis=-1)