Example #1
0
    def _initialize_kernel(self):
        """Create variables and logp function necessary to run kernel

        This method should not be overwritten. If needed, use `setup_kernel`
        instead.

        """
        # Create dictionary that stores original variables shape and size
        initial_point = self.model.recompute_initial_point(
            seed=self.rng.integers(2**30))
        for v in self.variables:
            self.var_info[v.name] = (initial_point[v.name].shape,
                                     initial_point[v.name].size)

        # Create particles bijection map
        if self.start:
            init_rnd = self.start
        else:
            init_rnd = self.initialize_population()

        population = []
        for i in range(self.draws):
            point = Point(
                {v.name: init_rnd[v.name][i]
                 for v in self.variables},
                model=self.model)
            population.append(DictToArrayBijection.map(point).data)
        self.tempered_posterior = np.array(floatX(population))

        # Initialize prior and likelihood log probabilities
        shared = make_shared_replacements(initial_point, self.variables,
                                          self.model)

        self.prior_logp_func = _logp_forw(initial_point, [self.model.varlogpt],
                                          self.variables, shared)
        self.likelihood_logp_func = _logp_forw(initial_point,
                                               [self.model.datalogpt],
                                               self.variables, shared)

        priors = [
            self.prior_logp_func(sample) for sample in self.tempered_posterior
        ]
        likelihoods = [
            self.likelihood_logp_func(sample)
            for sample in self.tempered_posterior
        ]

        self.prior_logp = np.array(priors).squeeze()
        self.likelihood_logp = np.array(likelihoods).squeeze()
Example #2
0
    def __init__(
        self,
        vars=None,
        batch_size=None,
        total_size=None,
        step_size=1.0,
        model=None,
        random_seed=None,
        minibatches=None,
        minibatch_tensors=None,
        **kwargs
    ):
        warnings.warn(EXPERIMENTAL_WARNING)

        model = modelcontext(model)

        if vars is None:
            vars = model.value_vars
        else:
            vars = [model.rvs_to_values.get(var, var) for var in vars]

        vars = inputvars(vars)

        self.model = model
        self.vars = vars
        self.batch_size = batch_size
        self.total_size = total_size
        _value_error(
            total_size != None or batch_size != None,
            "total_size and batch_size of training data have to be specified",
        )
        self.expected_iter = int(total_size / batch_size)

        # set random stream
        self.random = None
        if random_seed is None:
            self.random = at_rng()
        else:
            self.random = at_rng(random_seed)

        self.step_size = step_size

        shared = make_shared_replacements(vars, model)

        self.updates = OrderedDict()
        # XXX: This needs to be refactored
        self.q_size = None  # int(sum(v.dsize for v in self.vars))

        # This seems to be the only place that `Model.flatten` is used.
        # TODO: Why not _actually_ flatten the variables?
        # E.g. `flat_vars = at.concatenate([var.ravel() for var in vars])`
        # or `set_subtensor` the `vars` into a `at.vector`?

        flat_view = model.flatten(vars)
        self.inarray = [flat_view.input]

        self.dlog_prior = prior_dlogp(vars, model, flat_view)
        self.dlogp_elemwise = elemwise_dlogL(vars, model, flat_view)
        # XXX: This needs to be refactored
        self.q_size = None  # int(sum(v.dsize for v in self.vars))

        if minibatch_tensors is not None:
            _check_minibatches(minibatch_tensors, minibatches)
            self.minibatches = minibatches

            # Replace input shared variables with tensors
            def is_shared(t):
                return isinstance(t, aesara.compile.sharedvalue.SharedVariable)

            tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors]
            updates = OrderedDict(
                {t: t_ for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)}
            )
            self.minibatch_tensors = tensors
            self.inarray += self.minibatch_tensors
            self.updates.update(updates)

        self._initialize_values()
        super().__init__(vars, shared)
Example #3
0
    def __init__(self, vars=None, num_particles=40, max_stages=100, batch="auto", model=None):
        _log.warning("BART is experimental. Use with caution.")
        model = modelcontext(model)
        initial_values = model.recompute_initial_point()
        value_bart = inputvars(vars)[0]
        self.bart = model.values_to_rvs[value_bart].owner.op

        self.X = self.bart.X
        self.Y = self.bart.Y
        self.missing_data = np.any(np.isnan(self.X))
        self.m = self.bart.m
        self.alpha = self.bart.alpha
        self.k = self.bart.k
        self.alpha_vec = self.bart.split_prior
        if self.alpha_vec is None:
            self.alpha_vec = np.ones(self.X.shape[1])

        self.init_mean = self.Y.mean()
        # if data is binary
        Y_unique = np.unique(self.Y)
        if Y_unique.size == 2 and np.all(Y_unique == [0, 1]):
            self.mu_std = 6 / (self.k * self.m ** 0.5)
        # maybe we need to check for count data
        else:
            self.mu_std = (2 * self.Y.std()) / (self.k * self.m ** 0.5)

        self.num_observations = self.X.shape[0]
        self.num_variates = self.X.shape[1]
        self.available_predictors = list(range(self.num_variates))

        self.sum_trees = np.full_like(self.Y, self.init_mean).astype(aesara.config.floatX)
        self.a_tree = Tree.init_tree(
            leaf_node_value=self.init_mean / self.m,
            idx_data_points=np.arange(self.num_observations, dtype="int32"),
        )
        self.mean = fast_mean()

        self.normal = NormalSampler()
        self.prior_prob_leaf_node = compute_prior_probability(self.alpha)
        self.ssv = SampleSplittingVariable(self.alpha_vec)

        self.tune = True

        if batch == "auto":
            batch = max(1, int(self.m * 0.1))
            self.batch = (batch, batch)
        else:
            if isinstance(batch, (tuple, list)):
                self.batch = batch
            else:
                self.batch = (batch, batch)

        self.log_num_particles = np.log(num_particles)
        self.indices = list(range(2, num_particles))
        self.len_indices = len(self.indices)
        self.max_stages = max_stages

        shared = make_shared_replacements(initial_values, vars, model)
        self.likelihood_logp = logp(initial_values, [model.datalogpt], vars, shared)
        self.all_particles = []
        for i in range(self.m):
            self.a_tree.leaf_node_value = self.init_mean / self.m
            p = ParticleTree(self.a_tree)
            self.all_particles.append(p)
        self.all_trees = np.array([p.tree for p in self.all_particles])
        super().__init__(vars, shared)