def test_nonparam_dml(self): y, T, X, W = self._get_data() dml = NonParamDML(model_y=LinearRegression(), model_t=LinearRegression(), model_final=WeightedLasso(), random_state=123) dml.fit(y, T, X=X, W=W) with pytest.raises(Exception): dml.refit_final() dml.fit(y, T, X=X, W=W, cache_values=True) dml.model_final = DebiasedLasso(fit_intercept=False) dml.refit_final() assert isinstance(dml.model_cate, DebiasedLasso) dml.effect_interval(X[:1]) dml.featurizer = PolynomialFeatures(degree=2, include_bias=False) dml.refit_final() assert isinstance(dml.featurizer_, PolynomialFeatures) dml.effect_interval(X[:1]) dml.discrete_treatment = True dml.featurizer = None dml.linear_first_stages = True dml.model_t = LogisticRegression() dml.model_final = DebiasedLasso() dml.fit(y, T, X=X, W=W) newdml = NonParamDML(model_y=LinearRegression(), model_t=LogisticRegression(), model_final=DebiasedLasso(), discrete_treatment=True, random_state=123).fit(y, T, X=X, W=W) np.testing.assert_array_equal(dml.effect(X[:1]), newdml.effect(X[:1])) np.testing.assert_array_equal( dml.effect_interval(X[:1])[0], newdml.effect_interval(X[:1])[0])
def test_comparison(self): def reg(): return LinearRegression() def clf(): return LogisticRegression() y, T, X, true_eff = self._get_data() (X_train, X_val, T_train, T_val, Y_train, Y_val, _, true_eff_val) = train_test_split(X, T, y, true_eff, test_size=.4) models = [ ('ldml', LinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True, linear_first_stages=False, cv=3)), ('sldml', SparseLinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True, featurizer=PolynomialFeatures(degree=2, include_bias=False), linear_first_stages=False, cv=3)), ('xlearner', XLearner(models=reg(), cate_models=reg(), propensity_model=clf())), ('dalearner', DomainAdaptationLearner(models=reg(), final_models=reg(), propensity_model=clf())), ('slearner', SLearner(overall_model=reg())), ('tlearner', TLearner(models=reg())), ('drlearner', DRLearner(model_propensity=clf(), model_regression=reg(), model_final=reg(), cv=3)), ('rlearner', NonParamDML(model_y=reg(), model_t=clf(), model_final=reg(), discrete_treatment=True, cv=3)), ('dml3dlasso', DML(model_y=reg(), model_t=clf(), model_final=reg(), discrete_treatment=True, featurizer=PolynomialFeatures(degree=3), linear_first_stages=False, cv=3)) ] models = Parallel(n_jobs=1, verbose=1)( delayed(_fit_model)(name, mdl, Y_train, T_train, X_train) for name, mdl in models) scorer = RScorer(model_y=reg(), model_t=clf(), discrete_treatment=True, cv=3, mc_iters=2, mc_agg='median') scorer.fit(Y_val, T_val, X=X_val) rscore = [scorer.score(mdl) for _, mdl in models] rootpehe_score = [ np.sqrt( np.mean( (true_eff_val.flatten() - mdl.effect(X_val).flatten())**2)) for _, mdl in models ] assert LinearRegression().fit( np.array(rscore).reshape(-1, 1), np.array(rootpehe_score)).coef_ < 0.5 mdl, _ = scorer.best_model([mdl for _, mdl in models]) rootpehe_best = np.sqrt( np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2)) assert rootpehe_best < 1.5 * np.min(rootpehe_score) + 0.05 mdl, _ = scorer.ensemble([mdl for _, mdl in models]) rootpehe_ensemble = np.sqrt( np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2)) assert rootpehe_ensemble < 1.5 * np.min(rootpehe_score) + 0.05