Ejemplo n.º 1
0
def test_BaseRRegressor_without_p(generate_regression_data):
    y, X, treatment, tau, b, e = generate_regression_data()

    learner = BaseRRegressor(learner=XGBRegressor())

    # check the accuracy of the ATE estimation
    ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y)
    assert (ate_p >= lb) and (ate_p <= ub)
    assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD

    # check the accuracy of the CATE estimation with the bootstrap CI
    cate_p, _, _ = learner.fit_predict(X=X,
                                       treatment=treatment,
                                       y=y,
                                       return_ci=True,
                                       n_bootstraps=10)

    auuc_metrics = pd.DataFrame({
        'cate_p': cate_p.flatten(),
        'W': treatment,
        'y': y,
        'treatment_effect_col': tau
    })

    cumgain = get_cumgain(auuc_metrics,
                          outcome_col='y',
                          treatment_col='W',
                          treatment_effect_col='tau')

    # Check if the cumulative gain when using the model's prediction is
    # higher than it would be under random targeting
    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
Ejemplo n.º 2
0
    def __init__(
        self,
        learner=None,
        outcome_learner=None,
        effect_learner=None,
        random_state: StateType = None,
    ):
        """Setup an RLearner

        Args:
            learner: default learner for both outcome and effect
            outcome_learner: specific learner for outcome
            effect_learner: specific learner for effect
            random_state: RandomState or int to be used for K-fold splitting. NOT used
                in the learners, this has to be done by the user.
        """
        from causalml.inference.meta import BaseRRegressor

        if learner is None and (outcome_learner is None
                                and effect_learner is None):
            learner = LinearRegression()

        self.random_state = check_random_state(random_state)
        self.model = BaseRRegressor(learner,
                                    outcome_learner,
                                    effect_learner,
                                    random_state=random_state)
Ejemplo n.º 3
0
def test_BaseRRegressor_without_p(generate_regression_data):
    y, X, treatment, tau, b, e = generate_regression_data()

    learner = BaseRRegressor(learner=XGBRegressor())

    # check the accuracy of the ATE estimation
    ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y)
    assert (ate_p >= lb) and (ate_p <= ub)
    assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD

    # check the accuracy of the CATE estimation with the bootstrap CI
    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
    assert gini(tau, cate_p.flatten()) > .5
Ejemplo n.º 4
0
def test_BaseRRegressor(generate_regression_data):
    y, X, treatment, tau, b, e = generate_regression_data()

    learner = BaseRRegressor(learner=XGBRegressor())

    # check the accuracy of the ATE estimation
    ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e)
    assert (ate_p >= lb) and (ate_p <= ub)
    assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD

    # check pre-train model
    ate_p_pt, lb_pt, ub_pt = learner.estimate_ate(X=X,
                                                  treatment=treatment,
                                                  y=y,
                                                  p=e,
                                                  pretrain=True)
    assert (ate_p_pt == ate_p) and (lb_pt == lb) and (ub_pt == ub)

    # check the accuracy of the CATE estimation with the bootstrap CI
    cate_p, _, _ = learner.fit_predict(X=X,
                                       treatment=treatment,
                                       y=y,
                                       p=e,
                                       return_ci=True,
                                       n_bootstraps=10)

    auuc_metrics = pd.DataFrame({
        "cate_p": cate_p.flatten(),
        "W": treatment,
        "y": y,
        "treatment_effect_col": tau,
    })

    cumgain = get_cumgain(auuc_metrics,
                          outcome_col="y",
                          treatment_col="W",
                          treatment_effect_col="tau")

    # Check if the cumulative gain when using the model's prediction is
    # higher than it would be under random targeting
    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
Ejemplo n.º 5
0
    predictions['predictions_easy_treatment'] = predictions_easy_treatment
    predictions[
        'predictions_easy_treatment_test'] = predictions_easy_treatment_test
    predictions['predictions_randomized_trial'] = predictions_randomized_trial
    predictions[
        'predictions_randomized_trial_test'] = predictions_randomized_trial_test
    predictions['predictions_easy_propensity'] = predictions_easy_propensity
    predictions[
        'predictions_easy_propensity_test'] = predictions_easy_propensity_test

    return predictions


estimators_R = {  #'learner_dtr': BaseRRegressor(learner=DecisionTreeRegressor()),
    'learner_xgb': BaseRRegressor(learner=XGBRegressor()),
    'learner_lr': BaseRRegressor(learner=LinearRegression())
}

estimators_T = {
    'learner_xgb': BaseTRegressor(learner=XGBRegressor()),
    'learner_lr': BaseTRegressor(learner=LinearRegression())
}

import stacking_helpers

predictions_R = generate_predicitons_by_learner(estimators_R)
predictions_T = generate_predicitons_by_learner(estimators_T)

pred_R = predictions_R['predictions_randomized_trial']
pred_R_test = predictions_R['predictions_randomized_trial_test']
Ejemplo n.º 6
0
class RLearner:
    """A wrapper of the BaseRRegressor from ``causalml``

    Defaults to LassoLars regression as a base learner if not specified otherwise.
    Allows to either specify one learner for both tasks or two distinct learners
    for the task outcome and effect learning.

    References:
        CausalML Framework `on Github <https://github.com/uber/causalml/>'_.

        [1] X. Nie and S. Wager,
            “Quasi-Oracle Estimation of Heterogeneous Treatment Effects.”
    """
    def __init__(
        self,
        learner=None,
        outcome_learner=None,
        effect_learner=None,
        random_state: StateType = None,
    ):
        """Setup an RLearner

        Args:
            learner: default learner for both outcome and effect
            outcome_learner: specific learner for outcome
            effect_learner: specific learner for effect
            random_state: RandomState or int to be used for K-fold splitting. NOT used
                in the learners, this has to be done by the user.
        """
        from causalml.inference.meta import BaseRRegressor

        if learner is None and (outcome_learner is None
                                and effect_learner is None):
            learner = LinearRegression()

        self.random_state = check_random_state(random_state)
        self.model = BaseRRegressor(learner,
                                    outcome_learner,
                                    effect_learner,
                                    random_state=random_state)

    def __str__(self):
        """Simple string representation for logs and outputs"""
        return "{}(outcome={}, effect={})".format(
            self.__class__.__name__,
            self.model.model_mu.__class__.__name__,
            self.model.model_tau.__class__.__name__,
        )

    def __repr__(self):
        return self.__str__()

    def fit(self,
            x: np.array,
            t: np.array,
            y: np.array,
            p: np.array = None) -> None:
        """Fits the RLearner on given samples.

        Defaults to `justcause.learners.propensities.estimate_propensities`
        for ``p`` if not given explicitly, in order to allow a generic call
        to the fit() method

        Args:
            x: covariate matrix of shape (num_instances, num_features)
            t: treatment indicator vector, shape (num_instances)
            y: factual outcomes, (num_instances)
            p: propensities, shape (num_instances)

        """
        if p is None:
            # Propensity is needed by CausalML, so we estimate it,
            # if it was not provided
            p = estimate_propensities(x, t)

        self.model.fit(x, p, t, y)

    def predict_ite(self, x: np.array, *args) -> np.array:
        """Predicts ITE for given samples; ignores the factual outcome and treatment

        Args:
            x: covariates used for precition
            *args: NOT USED but kept to work with the standard ``fit(x, t, y)`` call

        """

        # assert t is None and y is None, "The R-Learner does not use factual outcomes"
        return self.model.predict(x).flatten()

    def estimate_ate(self,
                     x: np.array,
                     t: np.array,
                     y: np.array,
                     p: Optional[np.array] = None) -> float:
        """Estimate the average treatment effect (ATE) by fit and predict on given data

        Estimates the ATE as the mean of ITE predictions on the given data.

        Args:
            x: covariates of shape (num_samples, num_covariates)
            t: treatment indicator vector, shape (num_instances)
            y: factual outcomes, (num_instances)
            p: propensities, shape (num_instances)

        Returns:
            the average treatment effect estimate


        """
        self.fit(x, t, y, p)
        ite = self.predict_ite(x, t, y)
        return float(np.mean(ite))