Exemple #1
0
def test_enable_ipw_without_known_propensity_parallel_runner():

    seed = 0

    df = generate_data(
        N=1000,
        n_features=3,
        beta=[0, -2, 3, -5],  # Effect of [intercept and features] on outcome
        error_std=0.1,
        tau=[1, -5, -5, 10],  # Effect of [intercept and features] on treated outcome
        tau_std=0.1,
        discrete_outcome=True,
        seed=seed,
        feature_effect=0,  # Effect of beta on treated outxome
        propensity_coef=[
            0,
            -1,
            1,
            -1,
        ],  # Effect of [intercept and features] on propensity log-odds for treatment
        index_name="index",
    )

    train_df, test_df = train_test_split(df, test_size=0.2, stratify=df["Treatment"])

    cl = CausalLift(
        train_df, test_df, enable_ipw=True, verbose=3, runner="ParallelRunner"
    )
    train_df, test_df = cl.estimate_cate_by_2_models()
    estimated_effect_df = cl.estimate_recommendation_impact()
    assert isinstance(train_df, pd.DataFrame)
    assert isinstance(test_df, pd.DataFrame)
    assert isinstance(estimated_effect_df, pd.DataFrame)
def test_enable_ipw_with_known_propensity():
    seed = 0

    df = generate_data(
        N=1000,
        n_features=3,
        beta=[0, -2, 3, -5],  # Effect of [intercept and features] on outcome
        error_std=0.1,
        tau=[1, -5, -5,
             10],  # Effect of [intercept and features] on treated outcome
        tau_std=0.1,
        discrete_outcome=True,
        seed=seed,
        feature_effect=0,  # Effect of beta on treated outxome
        propensity_coef=[
            0,
            -1,
            1,
            -1,
        ],  # Effect of [intercept and features] on propensity log-odds for treatment
        index_name="index",
    )

    train_df, test_df = train_test_split(df,
                                         test_size=0.2,
                                         random_state=seed,
                                         stratify=df["Treatment"])

    test_random_propensity = True

    if test_random_propensity:
        import random

        train_df = train_df.copy()
        train_df.loc[:, "Propensity"] = [
            random.random() for _ in range(train_df.shape[0])
        ]

        test_df = test_df.copy()
        test_df.loc[:, "Propensity"] = [
            random.random() for _ in range(test_df.shape[0])
        ]

    cl = CausalLift(train_df, test_df, enable_ipw=True, verbose=3)
    train_df, test_df = cl.estimate_cate_by_2_models()
    estimated_effect_df = cl.estimate_recommendation_impact()
    assert isinstance(train_df, pd.DataFrame)
    assert isinstance(test_df, pd.DataFrame)
    assert isinstance(estimated_effect_df, pd.DataFrame)
def generate_data_(params):
    """
    # Generate simulated data
    # "Sleeping dogs" (a.k.a. "do-not-disturb"; people who will "buy" if not
    treated but will not "buy" if treated) can be simulated by negative values
    in tau parameter.
    # Observational data which includes confounding can be simulated by
    non-zero values in propensity_coef parameter.
    # A/B Test (RCT) with a 50:50 split can be simulated by all-zeros values
    in propensity_coef parameter (default).
    # The first element in each list parameter specifies the intercept.
    """

    df = generate_data(**params)
    return df
from causallift import CausalLift

from sklearn.model_selection import train_test_split

seed = 0

from causallift import generate_data

df = generate_data( \
    N=1000,
    n_features=3,
    beta=[0, -2, 3, -5],  # Effect of [intercept and features] on outcome
    error_std=0.1,
    tau=[1, -5, -5, 10],  # Effect of [intercept and features] on treated outcome
    tau_std=0.1,
    discrete_outcome=True,
    seed=seed,
    feature_effect=0,  # Effect of beta on treated outxome
    propensity_coef=[0, -1, 1, -1],  # Effect of [intercept and features] on propensity log-odds for treatment
    index_name='index')

train_df, test_df = train_test_split(df,
                                     test_size=0.2,
                                     random_state=seed,
                                     stratify=df['Treatment'])

cl = CausalLift(train_df, test_df, enable_ipw=False, random_state=0, verbose=3)
train_df, test_df = cl.estimate_cate_by_2_models()
estimated_effect_df = cl.estimate_recommendation_impact()
Exemple #5
0
def test_skopt():

    seed = 0

    df = generate_data(
        N=1000,
        n_features=3,
        beta=[0, -2, 3, -5],  # Effect of [intercept and features] on outcome
        error_std=0.1,
        tau=[1, -5, -5,
             10],  # Effect of [intercept and features] on treated outcome
        tau_std=0.1,
        discrete_outcome=True,
        seed=seed,
        feature_effect=0,  # Effect of beta on treated outxome
        propensity_coef=[
            0,
            -1,
            1,
            -1,
        ],  # Effect of [intercept and features] on propensity log-odds for treatment
        index_name="index",
    )

    train_df, test_df = train_test_split(df,
                                         test_size=0.2,
                                         random_state=seed,
                                         stratify=df["Treatment"])

    uplift_model_params = dict(
        search_cv="skopt.BayesSearchCV",
        estimator="xgboost.XGBClassifier",
        const_params=dict(
            booster="gbtree",
            silent=True,
            objective="binary:logistic",
            base_score=0.5,
            eval_metric="auc",
            n_jobs=-1,
            seed=seed,
        ),
        search_spaces=dict(n_estimators=Integer(100, 200)),
        random_state=seed,
        scoring="roc_auc",
        cv=3,
        n_jobs=-1,
        n_iter=10,
        verbose=1,
        refit=True,
    )

    cl = CausalLift(
        train_df,
        test_df,
        enable_ipw=True,
        verbose=3,
        uplift_model_params=uplift_model_params,
    )
    train_df, test_df = cl.estimate_cate_by_2_models()
    estimated_effect_df = cl.estimate_recommendation_impact()
    assert isinstance(train_df, pd.DataFrame)
    assert isinstance(test_df, pd.DataFrame)
    assert isinstance(estimated_effect_df, pd.DataFrame)

    seed = 0

    df = generate_data(
        N=1000,
        n_features=3,
        beta=[0, -2, 3, -5],  # Effect of [intercept and features] on outcome
        error_std=0.1,
        tau=[1, -5, -5,
             10],  # Effect of [intercept and features] on treated outcome
        tau_std=0.1,
        discrete_outcome=True,
        seed=seed,
        feature_effect=0,  # Effect of beta on treated outxome
        propensity_coef=[
            0,
            -1,
            1,
            -1,
        ],  # Effect of [intercept and features] on propensity log-odds for treatment
        index_name="index",
    )

    train_df, test_df = train_test_split(df,
                                         test_size=0.2,
                                         random_state=seed,
                                         stratify=df["Treatment"])
    estimator = XGBClassifier(
        booster="gbtree",
        silent=True,
        objective="binary:logistic",
        base_score=0.5,
        eval_metric="auc",
        n_jobs=-1,
        seed=seed,
    )

    model = BayesSearchCV(
        estimator=estimator,
        search_spaces=dict(n_estimators=Integer(100, 200)),
        random_state=seed,
        scoring="roc_auc",
        cv=3,
        n_jobs=-1,
        n_iter=10,
        verbose=1,
        refit=True,
    )

    cl_ext = CausalLift(train_df,
                        test_df,
                        enable_ipw=True,
                        verbose=3,
                        uplift_model_params=model)
    train_df_ext, test_df_ext = cl_ext.estimate_cate_by_2_models()
    estimated_effect_df_ext = cl_ext.estimate_recommendation_impact()
    assert isinstance(train_df_ext, pd.DataFrame)
    assert isinstance(test_df_ext, pd.DataFrame)
    assert isinstance(estimated_effect_df_ext, pd.DataFrame)