Example #1
0
class TestSplitModel(unittest.TestCase):
    def setUp(self):
        self.X = format_covariate_matrix(pd.DataFrame({"a": [1, 2, 3, 4, 5]}))
        self.raw_y = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
        self.data = Data(format_covariate_matrix(self.X),
                         self.raw_y,
                         normalize=True)
        normalizing_scale = self.data.y.normalizing_scale
        self.model = Model(self.data,
                           Sigma(0.001,
                                 0.001,
                                 scaling_factor=normalizing_scale),
                           n_trees=2,
                           initializer=None)
        self.model.initialize_trees()

    def test_tree_updating(self):
        updated_y = np.ones(5)
        self.model.trees[0].update_y(updated_y)
        self.assertListEqual(list(self.model.trees[0].nodes[0].data.y.values),
                             list(updated_y))

    def test_trees_initialized_correctly(self):
        self.assertEqual(len(self.model.trees), 2)
        for tree in self.model.trees:
            self.assertEqual(len(tree.nodes), 1)
Example #2
0
 def setUp(self):
     self.X = format_covariate_matrix(pd.DataFrame({"a": [1, 2, 3, 4, 5]}))
     self.raw_y = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
     self.data = Data(format_covariate_matrix(self.X),
                      self.raw_y,
                      normalize=True)
     normalizing_scale = self.data.y.normalizing_scale
     self.model = Model(self.data,
                        Sigma(0.001,
                              0.001,
                              scaling_factor=normalizing_scale),
                        n_trees=2,
                        initializer=None)
     self.model.initialize_trees()
Example #3
0
 def sample(self, model: Model, sigma: Sigma) -> float:
     posterior_alpha = sigma.alpha + (model.data.n_obsv / 2.)
     posterior_beta = sigma.beta + (0.5 *
                                    (np.sum(np.square(model.residuals()))))
     draw = np.power(np.random.gamma(posterior_alpha, 1. / posterior_beta),
                     -0.5)
     return draw
Example #4
0
 def _construct_model(self, X: np.ndarray, y: np.ndarray) -> Model:
     if len(X) == 0 or X.shape[1] == 0:
         raise ValueError("Empty covariate matrix passed")
     self.data = self._convert_covariates_to_data(X, y)
     self.sigma = Sigma(self.sigma_a, self.sigma_b,
                        self.data.normalizing_scale)
     self.model = Model(self.data,
                        self.sigma,
                        n_trees=self.n_trees,
                        alpha=self.alpha,
                        beta=self.beta)
     return self.model
Example #5
0
    def steps(self, model: Model) -> Generator[Callable[[Model], Sampler], None, None]:
        """
        Create a generator of the steps that need to be called to complete a full Gibbs sample

        Parameters
        ----------
        model: Model
            The model being sampled

        Returns
        -------
        Generator[Callable[[Model], Sampler], None, None]
            A generator a function to be called
        """
        for tree in model.refreshed_trees():
            yield lambda: self.tree_sampler.step(model, tree)
            for node in tree.leaf_nodes:
                yield lambda: self.leaf_sampler.step(model, node)
        yield lambda: self.sigma_sampler.step(model, model.sigma)
Example #6
0
    def samples(self,
                model: Model,
                n_samples: int,
                n_burn: int,
                thin: float = 0.1,
                store_in_sample_predictions: bool = True,
                store_acceptance: bool = True) -> Chain:
        print("Starting burn")

        trace_logger = self.trace_logger_class()

        for _ in tqdm(range(n_burn)):
            self.step(model, trace_logger)
        trace = []
        model_trace = []
        acceptance_trace = []
        print("Starting sampling")

        thin_inverse = 1. / thin

        for ss in tqdm(range(n_samples)):
            step_trace_dict = self.step(model, trace_logger)
            if ss % thin_inverse == 0:
                if store_in_sample_predictions:
                    in_sample_log = trace_logger["In Sample Prediction"](
                        model.predict())
                    if in_sample_log is not None:
                        trace.append(in_sample_log)
                if store_acceptance:
                    acceptance_trace.append(step_trace_dict)
                model_log = trace_logger["Model"](model)
                if model_log is not None:
                    model_trace.append(model_log)
        return {
            "model": model_trace,
            "acceptance": acceptance_trace,
            "in_sample_predictions": trace
        }
Example #7
0
    def samples(
        self,
        model: Model,
        n_samples: int,
        n_burn: int,
        thin: float = 0.1,
        store_in_sample_predictions: bool = True
    ) -> Tuple[List[Model], np.ndarray]:
        print("Starting burn")
        for _ in tqdm(range(n_burn)):
            self.step(model)
        trace = []
        model_trace = []
        print("Starting sampling")

        thin_inverse = 1. / thin

        for ss in tqdm(range(n_samples)):
            self.step(model)
            if ss % thin_inverse == 0:
                if store_in_sample_predictions:
                    trace.append(model.predict())
                model_trace.append(deep_copy_model(model))
        return model_trace, np.array(trace)
Example #8
0
 def sample(model: Model, sigma: Sigma) -> float:
     posterior_alpha = sigma.alpha + (model.data.n_obsv / 2.)
     posterior_beta = sigma.beta + (0.5 * (np.sum(np.square(model.residuals()))))
     draw = np.power(invgamma.rvs(posterior_alpha, scale=posterior_beta), 0.5)
     return draw
Example #9
0
 def _construct_model(self, X: Union[np.ndarray, pd.DataFrame], y: np.ndarray) -> Model:
     self.data = self._convert_covariates_to_data(X, y)
     self.sigma = Sigma(self.sigma_a, self.sigma_b, self.data.normalizing_scale)
     self.model = Model(self.data, self.sigma, n_trees=self.n_trees, alpha=self.alpha, beta=self.beta)
     return self.model