def _initialize_kernel(self): """Create variables and logp function necessary to run kernel This method should not be overwritten. If needed, use `setup_kernel` instead. """ # Create dictionary that stores original variables shape and size initial_point = self.model.recompute_initial_point( seed=self.rng.integers(2**30)) for v in self.variables: self.var_info[v.name] = (initial_point[v.name].shape, initial_point[v.name].size) # Create particles bijection map if self.start: init_rnd = self.start else: init_rnd = self.initialize_population() population = [] for i in range(self.draws): point = Point( {v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model) population.append(DictToArrayBijection.map(point).data) self.tempered_posterior = np.array(floatX(population)) # Initialize prior and likelihood log probabilities shared = make_shared_replacements(initial_point, self.variables, self.model) self.prior_logp_func = _logp_forw(initial_point, [self.model.varlogpt], self.variables, shared) self.likelihood_logp_func = _logp_forw(initial_point, [self.model.datalogpt], self.variables, shared) priors = [ self.prior_logp_func(sample) for sample in self.tempered_posterior ] likelihoods = [ self.likelihood_logp_func(sample) for sample in self.tempered_posterior ] self.prior_logp = np.array(priors).squeeze() self.likelihood_logp = np.array(likelihoods).squeeze()
def __init__( self, vars=None, batch_size=None, total_size=None, step_size=1.0, model=None, random_seed=None, minibatches=None, minibatch_tensors=None, **kwargs ): warnings.warn(EXPERIMENTAL_WARNING) model = modelcontext(model) if vars is None: vars = model.value_vars else: vars = [model.rvs_to_values.get(var, var) for var in vars] vars = inputvars(vars) self.model = model self.vars = vars self.batch_size = batch_size self.total_size = total_size _value_error( total_size != None or batch_size != None, "total_size and batch_size of training data have to be specified", ) self.expected_iter = int(total_size / batch_size) # set random stream self.random = None if random_seed is None: self.random = at_rng() else: self.random = at_rng(random_seed) self.step_size = step_size shared = make_shared_replacements(vars, model) self.updates = OrderedDict() # XXX: This needs to be refactored self.q_size = None # int(sum(v.dsize for v in self.vars)) # This seems to be the only place that `Model.flatten` is used. # TODO: Why not _actually_ flatten the variables? # E.g. `flat_vars = at.concatenate([var.ravel() for var in vars])` # or `set_subtensor` the `vars` into a `at.vector`? flat_view = model.flatten(vars) self.inarray = [flat_view.input] self.dlog_prior = prior_dlogp(vars, model, flat_view) self.dlogp_elemwise = elemwise_dlogL(vars, model, flat_view) # XXX: This needs to be refactored self.q_size = None # int(sum(v.dsize for v in self.vars)) if minibatch_tensors is not None: _check_minibatches(minibatch_tensors, minibatches) self.minibatches = minibatches # Replace input shared variables with tensors def is_shared(t): return isinstance(t, aesara.compile.sharedvalue.SharedVariable) tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors] updates = OrderedDict( {t: t_ for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)} ) self.minibatch_tensors = tensors self.inarray += self.minibatch_tensors self.updates.update(updates) self._initialize_values() super().__init__(vars, shared)
def __init__(self, vars=None, num_particles=40, max_stages=100, batch="auto", model=None): _log.warning("BART is experimental. Use with caution.") model = modelcontext(model) initial_values = model.recompute_initial_point() value_bart = inputvars(vars)[0] self.bart = model.values_to_rvs[value_bart].owner.op self.X = self.bart.X self.Y = self.bart.Y self.missing_data = np.any(np.isnan(self.X)) self.m = self.bart.m self.alpha = self.bart.alpha self.k = self.bart.k self.alpha_vec = self.bart.split_prior if self.alpha_vec is None: self.alpha_vec = np.ones(self.X.shape[1]) self.init_mean = self.Y.mean() # if data is binary Y_unique = np.unique(self.Y) if Y_unique.size == 2 and np.all(Y_unique == [0, 1]): self.mu_std = 6 / (self.k * self.m ** 0.5) # maybe we need to check for count data else: self.mu_std = (2 * self.Y.std()) / (self.k * self.m ** 0.5) self.num_observations = self.X.shape[0] self.num_variates = self.X.shape[1] self.available_predictors = list(range(self.num_variates)) self.sum_trees = np.full_like(self.Y, self.init_mean).astype(aesara.config.floatX) self.a_tree = Tree.init_tree( leaf_node_value=self.init_mean / self.m, idx_data_points=np.arange(self.num_observations, dtype="int32"), ) self.mean = fast_mean() self.normal = NormalSampler() self.prior_prob_leaf_node = compute_prior_probability(self.alpha) self.ssv = SampleSplittingVariable(self.alpha_vec) self.tune = True if batch == "auto": batch = max(1, int(self.m * 0.1)) self.batch = (batch, batch) else: if isinstance(batch, (tuple, list)): self.batch = batch else: self.batch = (batch, batch) self.log_num_particles = np.log(num_particles) self.indices = list(range(2, num_particles)) self.len_indices = len(self.indices) self.max_stages = max_stages shared = make_shared_replacements(initial_values, vars, model) self.likelihood_logp = logp(initial_values, [model.datalogpt], vars, shared) self.all_particles = [] for i in range(self.m): self.a_tree.leaf_node_value = self.init_mean / self.m p = ParticleTree(self.a_tree) self.all_particles.append(p) self.all_trees = np.array([p.tree for p in self.all_particles]) super().__init__(vars, shared)