def conditional(self, name, Xnew, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. Given a set of function values `f` that the TP prior was over, the conditional distribution over a set of new points, `f_*` is Parameters ---------- name : string Name of the random variable Xnew : array-like Function input values. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ X = self.X f = self.f nu2, mu, cov = self._build_conditional(Xnew, X, f) shape = infer_shape(Xnew, kwargs.pop("shape", None)) return pm.MvStudentT(name, nu=nu2, mu=mu, cov=cov, shape=shape, **kwargs)
def __init__(self,data=None, x=None, y=None, ndim = None, mu_prior=[0.0,1000.], sigma_prior=200.): #if ndim is None: self.fitted=False self.plot_trace_vars = ['mu', "nu", "chol_corr"] #, "~nu-1", "~cov", "~chol_stds", "~chol"] if data is None: if x is None and y is None: raise ValueError("Either data must be given as input, or x and y") else: self.ndim = 2 self.data = np.column_stack((x,y)) else: if ndim is None: self.ndim = data.shape[1] else: self.ndim = ndim if self.ndim != data.shape[1]: raise ValueError("Data must have the same number of features and ndim") self.data = data self.model = pm.Model() with self.model: #we put weakly informative priors on the means and standard deviations of the multivariate normal distribution mu = pm.Normal("mu", mu=mu_prior[0], sigma=mu_prior[1], shape=self.ndim) sigma = pm.HalfCauchy.dist(sigma_prior) #and a prior on the covariance matrix which weakly penalises strong correlations chol, corr, stds = pm.LKJCholeskyCov("chol", n=self.ndim, eta=2.0, sd_dist=sigma, compute_corr=True) #the prior gives us the Cholesky Decomposition of the covariance matrix, so for completeness we can calculate that determinisitically cov = pm.Deterministic("cov", chol.dot(chol.T)) nuMinusOne = pm.Exponential('nu-1', lam=1./29.) nu = pm.Deterministic('nu', nuMinusOne + 1) #and now we can put our observed values into a multivariate t distribution to complete the model vals = pm.MvStudentT('vals', nu = nu, mu=mu, chol=chol, observed=self.data)
def analyze_robust(data): with pm.Model() as model: # priors mu = pm.Normal('mu', mu=0., tau=0.000001, shape=2, testval=np.array([-100, 100])) # set mu to median sigma = pm.Uniform('sigma', lower=0, upper=0.001, shape=2, testval=np.array([0.0001, 0.001]), # init with mad transform=None) rho = pm.Uniform('r', lower=-1, upper=1, testval=-0.2144021, # init with Spearman's correlation transform=None) # print values for debugging rho_p = rho sigma_p = sigma cov = pm.Deterministic('cov', covariance(sigma_p, rho_p)) num = pm.Exponential('nu_minus_one', lam=1. / 29., testval=1) nu = pm.Deterministic('nu', num + 1) cov_p = cov nu_p = nu mult_norm = pm.MvStudentT('mult_norm', nu=nu_p, mu=mu, Sigma=cov_p, observed=data.T) return model
def fit(self, X, y): """ Fits a Student-t regressor using MCMC. Parameters ---------- X: np.ndarray, shape=(nsamples, nfeatures) Training instances to fit the GP. y: np.ndarray, shape=(nsamples,) Corresponding continuous target values to `X`. """ self.X = X self.n = self.X.shape[0] self.y = y self.model = pm.Model() with self.model as model: l = pm.Uniform('l', 0, 10) log_s2_f = pm.Uniform('log_s2_f', lower=-7, upper=5) s2_f = pm.Deterministic('sigmaf', tt.exp(log_s2_f)) log_s2_n = pm.Uniform('log_s2_n', lower=-7, upper=5) s2_n = pm.Deterministic('sigman', tt.exp(log_s2_n)) f_cov = s2_f * covariance_equivalence[type(self.covfunc).__name__](1, l) Sigma = f_cov(self.X) + tt.eye(self.n) * s2_n ** 2 y_obs = pm.MvStudentT('y_obs', nu=self.nu, mu=np.zeros(self.n), Sigma=Sigma, observed=self.y) with self.model as model: if self.step is not None: self.trace = pm.sample(self.niter, step=self.step())[self.burnin:] else: self.trace = pm.sample(self.niter, init=self.init)[self.burnin:]
def _build_prior(self, name, X, reparameterize=True, **kwargs): mu = self.mean_func(X) chol = cholesky(stabilize(self.cov_func(X))) shape = infer_shape(X, kwargs.pop("shape", None)) if reparameterize: chi2 = pm.ChiSquared("chi2_", self.nu) v = pm.Normal(name + "_rotated_", mu=0.0, sd=1.0, shape=shape, **kwargs) f = pm.Deterministic(name, (tt.sqrt(self.nu) / chi2) * (mu + tt.dot(chol, v))) else: f = pm.MvStudentT(name, nu=self.nu, mu=mu, chol=chol, shape=shape, **kwargs) return f
def fit_t(self, data, nu=5): with pm.Model() as model: packed_L = pm.LKJCholeskyCov('packed_L', n=data.shape[1], eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(data.shape[1], packed_L) cov = pm.Deterministic('cov', L.dot(L.T)) mean = pm.Normal('mean', mu=0, sigma=10, shape=data.shape[1]) obs = pm.MvStudentT('obs', nu=nu, mu=mean, chol=L, observed=data) params = pm.find_MAP(model=model, progressbar=False) return params['mean'], params['cov']
def bayesian_correlation(vect1, vect2, robust=True): ''' Sample the posterior distribution of a multivariateStudent-t distribution observing vect1 and vect2. Compute the equivalent of pearsonr if robust=False, i.e. MvNormal. ''' def covariance(sigma, rho): C = t.fill_diagonal(t.alloc(rho, 2, 2), 1.) S = t.diag(sigma) M = S.dot(C).dot(S) return M vect1 = np.reshape(vect1, (-1, 1)) vect2 = np.reshape(vect2, (-1, 1)) with pm.Model() as multivariate: # priors sigma1 = pm.HalfCauchy('sigma1', 2 * np.std(vect1)) sigma2 = pm.HalfCauchy('sigma2', 2 * np.std(vect2)) r = pm.Uniform( 'r', lower=-1, upper=1, testval=stats.spearmanr( vect1, vect2)[0], # init with Spearman's correlation ) cov = pm.Deterministic( 'covl', covariance(t.stack((sigma1, sigma2), axis=0), r)) μ1 = pm.Normal('μ1', mu=np.mean(vect1), sd=2 * np.std(vect1)) μ2 = pm.Normal('μ2', mu=np.mean(vect2), sd=2 * np.std(vect2)) if not robust: mult_norm = pm.MvNormal('mult_norm', mu=[μ1, μ2], cov=cov, observed=np.hstack((vect1, vect2))) else: num = pm.Exponential('nu_minus_one', lam=1. / 29., testval=1) ν = pm.Deterministic('ν', num + 1) mult_norm = pm.MvStudentT('mult_norm', nu=ν, mu=[μ1, μ2], cov=cov, observed=np.hstack((vect1, vect2))) trace = pm.sample() return trace
def __init__(self, data, sigma, mu_prior=[0.0,1000.], sigma_prior=200.): self.fitted=False if np.any(sigma <=0.): raise ValueError("Uncertainties must be positive real numbers!") self.plot_trace_vars = ['mu', "nu", "chol_corr"] if data is None: raise ValueError("Either data must be given as input, or x and y") else: self.ndim = data.shape[1] self.npoints = data.shape[0] self.data = data if data.shape != sigma.shape: raise RuntimeError("data and sigma must have the same shape!") self.sigma = sigma self.model = pm.Model() with self.model: #we put weakly informative hyperpriors on the means and standard deviations of the multivariate normal distribution mu = pm.Normal("mu", mu=mu_prior[0], sigma=mu_prior[1], shape=self.ndim) sigma = pm.HalfCauchy.dist(sigma_prior) #and a hyperprior on the covariance matrix which weakly penalises strong correlations chol, corr, stds = pm.LKJCholeskyCov("chol", n=self.ndim, eta=2.0, sd_dist=sigma, compute_corr=True) #the hyperprior gives us the Cholesky Decomposition of the covariance matrix, so for completeness we can calculate that determinisitically cov = pm.Deterministic("cov", chol.dot(chol.T)) nuMinusOne = pm.Exponential('nu-1', lam=1./29.) nu = pm.Deterministic('nu', nuMinusOne + 1) #and now we can construct our multivariate t distribituions to complete the prior prior = pm.MvStudentT('vals', nu = nu, mu=mu, chol=chol, shape=(self.npoints,self.ndim)) #, observed=self.data) #print(prior) #help(prior) mu1s = prior[:,0] #Finally, we need to define our data for i in range(self.ndim): pm.Normal("data_"+str(i), mu=prior[:,i], sigma = self.sigma[:,i], observed=self.data[:,i])
def __init__(self, n_to_sample=2000, *args, **kwargs): super(MvStudentTBayesianSolver, self).__init__(*args, **kwargs) self.n_to_sample = n_to_sample self.model = pm.Model() self.shared_data = theano.shared(np.zeros((5, 5)) * 0.5, borrow=True) with self.model: sd_dist = pm.Gamma.dist(alpha=3.0, beta=1.0) #sd_dist = pm.HalfCauchy.dist(beta=2.5) packed_chol = pm.LKJCholeskyCov('chol_cov', eta=2, n=5, sd_dist=sd_dist) chol = pm.expand_packed_triangular(5, packed_chol, lower=True) cov = pm.Deterministic('cov', theano.dot(chol, chol.T)) self.mu_dist = pm.MvNormal("mu", mu=np.zeros(5), chol=chol, shape=5) observed = pm.MvStudentT('obs', nu=3.5, mu=self.mu_dist, chol=chol, observed=self.shared_data) self.step = pm.Metropolis()
def analyze_robust1(data): with pm.Model() as model: # priors might be adapted here to be less flat mu = pm.Normal('mu', mu=0., sd=100., shape=2, testval=np.median(data.T, axis=1)) bound_sigma = pm.Bound(pm.Normal, lower=0.) sigma = bound_sigma('sigma', mu=0., sd=100., shape=2, testval=mad(data, axis=0)) rho = pm.Uniform('r', lower=-1., upper=1., testval=0) cov = pm.Deterministic('cov', covariance(sigma, rho)) bound_nu = pm.Bound(pm.Gamma, lower=1.) nu = bound_nu('nu', alpha=2, beta=10) mult_t = pm.MvStudentT('mult_t', nu=nu, mu=mu, Sigma=cov, observed=data) return model
def fit(self, x, y_obs, prior_trace=None, cores=4, prior_index=None): """ :param x: training features :param y_obs: training binary class labels 0/1 :param prior_trace: default None, used as prior if not None :param cores: n CPU cores to use for sampling, default 4, set to 1 if get runtime error :param prior_index: index previously used to fit betas, remove to avoid double weighting this data :return: trace, to be used as next prior finds distribution for coefficients in logistic regression sets beta_hat to mean vector of MvDistribution """ self.training_data_index = x.index print('shape before index drop: ' + str(x.shape)) #print(x.index) #print(y_obs.index) if prior_index is not None: x = x.drop( prior_index, errors='ignore' ) # errors=ignore because deleted data indexes are not in new indexes y_obs = y_obs.drop(prior_index, errors='ignore') x = np.array(x) y_obs = np.array(y_obs) if self.fit_intercept: ones = np.array([1] * x.shape[0]) x = np.column_stack((ones, x)) n_features = x.shape[1] #print('X looks like:') #print(x) #print('') #print('y looks like:') #print(y_obs) #print('shape after index drop: ' + str(x.shape)) if True: #__name__ == 'bayesianLogisticRegression': try: with pm.Model() as model: if prior_trace is None: # then model has not been initialized with original prior # original prior: mu = np.zeros(n_features) cov = np.identity(n_features) betas = pm.MvNormal('betas', mu=mu, cov=cov, shape=n_features) else: # previous_trace is the sample from the latest found posterior # here we find the new prior by estimating the parameters of the latest posterior nu = prior_trace['betas'].shape[ 0] # number of degrees of freedom for MvStudentT is assumed to be number of points in sample mu = prior_trace['betas'].mean( 0 ) # mean 0 gives mean of each column, i.e. coefficient beta_i cov = ((1. * nu) / (nu - 2)) * np.cov(prior_trace['betas'].T) betas = pm.MvStudentT('betas', mu=mu, cov=cov, nu=nu, shape=n_features) p = pm.math.invlogit( x @ betas ) # give the probability in a logistic regression model # TODO: should it be -x @ beta? # Define likelihood y = pm.Bernoulli('y', p, observed=y_obs) # Inference: self.trace = pm.sample( 2000, cores=cores) # cores = 1, if runtime error self.beta_hat = self.trace['betas'].mean(0) if self.fit_intercept: self.coef_ = self.beta_hat[1:] self.intercept_ = self.beta_hat[0] else: self.coef_ = self.beta_hat self.intercept_ = None except RuntimeError as err: print('Runtime error: {0}'.format(err)) #time.sleep(10) # Wait for 10 seconds return self.trace
def __init__(self, name, votes, polls, cholesky_matrix, after_polls_cholesky_matrix, election_day_cholesky_matrix, test_results, house_effects_model, min_polls_per_pollster, adjacent_day_fn): super(ElectionDynamicsModel, self).__init__(name) self.votes = votes self.polls = polls self.num_parties = polls.num_parties self.num_days = polls.num_days self.num_pollsters = polls.num_pollsters self.max_poll_days = polls.max_poll_days self.cholesky_matrix = cholesky_matrix self.after_polls_cholesky_matrix = after_polls_cholesky_matrix self.election_day_cholesky_matrix = election_day_cholesky_matrix if type(adjacent_day_fn) in [int, float]: self.adjacent_day_fn = lambda diff: (1. + diff)**adjacent_day_fn else: self.adjacent_day_fn = adjacent_day_fn self.test_results = (polls.get_last_days_average(10) if test_results is None else test_results) # In some cases, we might want to filter pollsters without a minimum # number of polls. Because these pollsters produced only a few polls, # we cannot determine whether their results are biased or not. polls_per_pollster = { pollster_id: sum(1 for p in self.polls if p.pollster_id == pollster_id) for pollster_id in range(self.num_pollsters) } self.min_polls_per_pollster = min_polls_per_pollster self.num_pollsters_in_model = 0 self.pollster_mapping = {} self.has_official = False for pollster_id, count in polls_per_pollster.items(): if count >= self.min_polls_per_pollster: self.pollster_mapping[ pollster_id] = self.num_pollsters_in_model self.num_pollsters_in_model += 1 else: self.pollster_mapping[pollster_id] = None if 'Official' in self.polls.pollster_ids: official_pollster = self.polls.pollster_ids.index('Official') self.pollster_mapping[ official_pollster] = self.num_pollsters_in_model self.num_pollsters_in_model += 1 self.has_official = True self.filtered_polls = [ p for p in self.polls if self.pollster_mapping[p.pollster_id] is not None ] if len(self.polls) - len(self.filtered_polls) > 0: print( "Some polls were filtered out. Provided polls: %d, filtered: %d, final total: %d" % (len(self.polls), len(self.polls) - len(self.filtered_polls), len(self.filtered_polls))) else: print("Using all %d provided polls." % len(self.polls)) print("Pollsters used: %s" % ', '.join( self.polls.pollster_ids[k] for k, v in self.pollster_mapping.items() if v is not None)) self.first_poll_day = min(p.end_day for p in self.filtered_polls) # The base polls model. House-effects models # are optionally set up based on this model. # The innovations are multivariate normal with the same # covariance/cholesky matrix as the polls' MvStudentT # variable. The assumption is that the parties' covariance # is invariant throughout the election campaign and # influences polls, evolving support and election day # vote. self.innovations = [ pm.MvNormal('election_day_innovations', mu=np.zeros([1, self.num_parties]), chol=self.election_day_cholesky_matrix, shape=[1, self.num_parties], testval=np.zeros([1, self.num_parties])) ] if self.first_poll_day > 1: self.innovations += [ pm.MvNormal('after_poll_innovations', mu=np.zeros( [self.first_poll_day - 1, self.num_parties]), chol=self.after_polls_cholesky_matrix, shape=[self.first_poll_day - 1, self.num_parties], testval=np.zeros( [self.first_poll_day - 1, self.num_parties])) ] self.innovations += [ pm.MvNormal('poll_innovations', mu=np.zeros([ self.num_days - max(self.first_poll_day, 1), self.num_parties ]), chol=self.cholesky_matrix, shape=[ self.num_days - max(self.first_poll_day, 1), self.num_parties ], testval=np.zeros([ self.num_days - max(self.first_poll_day, 1), self.num_parties ])) ] # The random walk itself is a cumulative sum of the innovations. self.walk = pm.Deterministic( 'walk', tt.concatenate(self.innovations, axis=0).cumsum(axis=0)) # The modeled support of the various parties over time is the sum # of both the election-day votes and the innovations that led up to it. # The support at day 0 is the election day vote. self.support = pm.Deterministic('support', self.votes + self.walk) # Group polls by number of days. This is necessary to allow generating # a different cholesky matrix for each. This corresponds to the # average of the modeled support used for multi-day polls. group_polls = lambda poll: poll.num_poll_days # Group the polls and create the likelihood variable. self.grouped_polls = [ (num_poll_days, [p for p in polls]) for num_poll_days, polls in itertools.groupby( sorted(self.filtered_polls, key=group_polls), group_polls) ] # To handle multiple-day polls, we average the party support for the # relevant days def expected_poll_outcome(p): if p.num_poll_days > 1: poll_days = [d for d in range(p.end_day, p.start_day + 1)] return self.walk[poll_days].mean(axis=0) else: return self.walk[p.start_day] def expected_polls_outcome(polls): if self.adjacent_day_fn is None: return [expected_poll_outcome(p) for p in polls] + self.votes else: weights = np.asarray([[ sum( self.adjacent_day_fn(abs(d - poll_day)) for poll_day in range(p.end_day, p.start_day + 1)) if d >= self.first_poll_day else 0 for d in range(self.num_days) ] for p in polls]) return tt.dot(weights / weights.sum(axis=1, keepdims=True), self.walk + self.votes) self.mus = { num_poll_days: expected_polls_outcome(polls) for num_poll_days, polls in self.grouped_polls } self.create_house_effects(house_effects_model) self.likelihoods = [ # The Multivariate Student-T variable that models the polls. # # The polls are modeled as a MvStudentT distribution which allows to # take into consideration the number of people polled as well as the # cholesky-covariance matrix that is central to the model. # Because we average the support over the number of poll days n, we # also need to appropriately factor the cholesky matrix. We assume # no correlation between different days, so the factor is 1/n for # the variance, and 1/sqrt(n) for the cholesky matrix. pm.MvStudentT('polls_%d_days' % num_poll_days, nu=[p.num_polled - 1 for p in polls], mu=self.mus[num_poll_days], chol=self.cholesky_matrix / np.sqrt(num_poll_days), testval=test_results, shape=[len(polls), self.num_parties], observed=[p.percentages for p in polls]) for num_poll_days, polls in self.grouped_polls ]