Python CausalAnalysis.whatif 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: econml.solutions.causal_analysis

클래스/타입: CausalAnalysis

메소드/함수: whatif

hotexamples.com에서의 예제들: 6

Python CausalAnalysis.whatif - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 econml.solutions.causal_analysis.CausalAnalysis.whatif에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

CausalAnalysis(22)

fit(21)

global_causal_effect(18)

local_causal_effect(15)

_policy_tree_output(12)

_cohort_causal_effect_dict(10)

cohort_causal_effect(10)

_global_causal_effect_dict(10)

_local_causal_effect_dict(10)

_heterogeneity_tree_output(7)

_whatif_dict(6)

whatif(6)

_policy_tree_string(5)

_heterogeneity_tree_string(5)

individualized_policy(3)

_individualized_policy_dict(2)

upper_bound_on_cat_expansion(2)

feature_inds(1)

skip_cat_limit_checks(1)

typical_treatment_value(1)

예제 #1

파일 보기

파일: test_causal_analysis.py 프로젝트: ScriptBox21/MS-EconML

    def test_final_models(self):
        d_y = (1, )
        y = np.random.choice([0, 1], size=(500, ) + d_y)
        X = np.hstack((np.random.normal(size=(500, 2)),
                       np.random.choice([0, 1], size=(500, 1)),
                       np.random.choice([0, 1, 2], size=(500, 1))))
        inds = [0, 1, 2, 3]
        cats = [2, 3]
        hinds = [0, 3]
        for h_model in ['forest', 'linear']:
            for classification in [False, True]:
                ca = CausalAnalysis(inds,
                                    cats,
                                    hinds,
                                    classification=classification,
                                    heterogeneity_model=h_model)
                ca.fit(X, y)
                glo = ca.global_causal_effect()
                coh = ca.cohort_causal_effect(X[:2])
                loc = ca.local_causal_effect(X[:2])
                glo_dict = ca._global_causal_effect_dict()
                coh_dict = ca._cohort_causal_effect_dict(X[:2])
                loc_dict = ca._local_causal_effect_dict(X[:2])

                ca._policy_tree_output(X, 1)
                ca._heterogeneity_tree_string(X, 1)
                ca._heterogeneity_tree_string(X, 3)

                # Can't handle multi-dimensional treatments
                with self.assertRaises(AssertionError):
                    ca._policy_tree_output(X, 3)

                if not classification:
                    # ExitStack can be used as a "do nothing" ContextManager
                    cm = ExitStack()
                else:
                    cm = self.assertRaises(Exception)
                with cm:
                    inf = ca.whatif(X[:2], np.ones(shape=(2, )), 1, y[:2])
                    inf.summary_frame()
                    inf = ca.whatif(X[:2], np.ones(shape=(2, )), 2, y[:2])
                    inf.summary_frame()

                    ca._whatif_dict(X[:2], np.ones(shape=(2, )), 1, y[:2])

        with self.assertRaises(AssertionError):
            ca = CausalAnalysis(inds,
                                cats,
                                hinds,
                                classification=classification,
                                heterogeneity_model='other')
            ca.fit(X, y)

예제 #2

파일 보기

파일: test_causal_analysis.py 프로젝트: FoundryAI/EconML

    def test_final_models(self):
        d_y = (1, )
        y = np.random.choice([0, 1], size=(500, ) + d_y)
        X = np.hstack((np.random.normal(size=(500, 2)),
                       np.random.choice([0, 1], size=(500, 1)),
                       np.random.choice([0, 1, 2], size=(500, 1))))
        inds = [0, 1, 2, 3]
        cats = [2, 3]
        hinds = [0, 3]
        for h_model in ['forest', 'linear']:
            for classification in [False, True]:
                ca = CausalAnalysis(inds,
                                    cats,
                                    hinds,
                                    classification=classification,
                                    heterogeneity_model=h_model)
                ca.fit(X, y)
                glo = ca.global_causal_effect()
                coh = ca.cohort_causal_effect(X[:2])
                loc = ca.local_causal_effect(X[:2])
                glo_dict = ca._global_causal_effect_dict()
                coh_dict = ca._cohort_causal_effect_dict(X[:2])
                loc_dict = ca._local_causal_effect_dict(X[:2])

                ca._policy_tree_output(X, 1)
                ca._heterogeneity_tree_output(X, 1)
                ca._heterogeneity_tree_output(X, 3)

                # Make sure we handle continuous, binary, and multi-class treatments
                # For multiple discrete treatments, one "always treat" value per non-default treatment
                for (idx, length) in [(0, 1), (1, 1), (2, 1), (3, 2)]:
                    _, policy_val, always_trt = ca._policy_tree_output(X, idx)
                    assert isinstance(always_trt, list)
                    assert np.array(policy_val).shape == ()
                    assert np.array(always_trt).shape == (length, )

                    # policy value should exceed always treating with any treatment
                    assert_less_close(always_trt, policy_val)

                if not classification:
                    # ExitStack can be used as a "do nothing" ContextManager
                    cm = ExitStack()
                else:
                    cm = self.assertRaises(Exception)
                with cm:
                    inf = ca.whatif(X[:2], np.ones(shape=(2, )), 1, y[:2])
                    inf = ca.whatif(X[:2], np.ones(shape=(2, )), 2, y[:2])
                    ca._whatif_dict(X[:2], np.ones(shape=(2, )), 1, y[:2])

        with self.assertRaises(AssertionError):
            ca = CausalAnalysis(inds,
                                cats,
                                hinds,
                                classification=classification,
                                heterogeneity_model='other')
            ca.fit(X, y)

예제 #3

파일 보기

파일: test_causal_analysis.py 프로젝트: ScriptBox21/MS-EconML

    def test_basic_pandas(self):
        for classification in [False, True]:
            y = pd.Series(np.random.choice([0, 1], size=(500, )))
            X = pd.DataFrame({
                'a': np.random.normal(size=500),
                'b': np.random.normal(size=500),
                'c': np.random.choice([0, 1], size=500),
                'd': np.random.choice(['a', 'b', 'c'], size=500)
            })
            n_inds = [0, 1, 2, 3]
            t_inds = ['a', 'b', 'c', 'd']
            n_cats = [2, 3]
            t_cats = ['c', 'd']
            n_hinds = [0, 3]
            t_hinds = ['a', 'd']
            for (inds, cats, hinds) in [(n_inds, n_cats, n_hinds),
                                        (t_inds, t_cats, t_hinds)]:
                ca = CausalAnalysis(inds,
                                    cats,
                                    hinds,
                                    classification=classification)
                ca.fit(X, y)
                glo = ca.global_causal_effect()
                coh = ca.cohort_causal_effect(X[:2])
                loc = ca.local_causal_effect(X[:2])

                # global and cohort data should have exactly the same structure, but different values
                assert glo.index.equals(coh.index)

                # local index should have as many times entries as global as there were rows passed in
                assert len(loc.index) == 2 * len(glo.index)

                assert glo.index.names == ['feature', 'feature_value']
                assert loc.index.names == ['sample'] + glo.index.names

                # features; for categoricals they should appear #cats-1 times each
                fts = ['a', 'b', 'c', 'd', 'd']

                for i in range(len(fts)):
                    assert fts[i] == glo.index[i][0] == loc.index[i][
                        1] == loc.index[len(fts) + i][1]

                glo_dict = ca._global_causal_effect_dict()
                coh_dict = ca._cohort_causal_effect_dict(X[:2])
                loc_dict = ca._local_causal_effect_dict(X[:2])

                glo_point_est = np.array(
                    glo_dict[_CausalInsightsConstants.PointEstimateKey])
                coh_point_est = np.array(
                    coh_dict[_CausalInsightsConstants.PointEstimateKey])
                loc_point_est = np.array(
                    loc_dict[_CausalInsightsConstants.PointEstimateKey])

                # global shape is (d_y, sum(d_t))
                assert glo_point_est.shape == coh_point_est.shape == (1, 5)
                assert loc_point_est.shape == (2, ) + glo_point_est.shape

                ca._policy_tree_output(X, inds[1])
                ca._heterogeneity_tree_string(X, inds[1])
                ca._heterogeneity_tree_string(X, inds[3])

                # Can't handle multi-dimensional treatments
                with self.assertRaises(AssertionError):
                    ca._policy_tree_output(X, inds[3])

                if not classification:
                    # ExitStack can be used as a "do nothing" ContextManager
                    cm = ExitStack()
                else:
                    cm = self.assertRaises(Exception)
                with cm:
                    inf = ca.whatif(X[:2], np.ones(shape=(2, )), inds[1],
                                    y[:2])
                    assert np.shape(inf.point_estimate) == np.shape(y[:2])
                    inf.summary_frame()
                    inf = ca.whatif(X[:2], np.ones(shape=(2, )), inds[2],
                                    y[:2])
                    assert np.shape(inf.point_estimate) == np.shape(y[:2])
                    inf.summary_frame()

                    ca._whatif_dict(X[:2], np.ones(shape=(2, )), inds[1],
                                    y[:2])

            badargs = [
                (n_inds, n_cats, [4]),  # hinds out of range
                (n_inds, n_cats, ["test"])  # hinds out of range
            ]

            for args in badargs:
                with self.assertRaises(Exception):
                    ca = CausalAnalysis(*args)
                    ca.fit(X, y)

예제 #4

파일 보기

파일: test_causal_analysis.py 프로젝트: ScriptBox21/MS-EconML

    def test_automl_first_stage(self):
        d_y = (1, )
        for classification in [False, True]:
            y = np.random.choice([0, 1], size=(500, ) + d_y)
            X = np.hstack((np.random.normal(size=(500, 2)),
                           np.random.choice([0, 1], size=(500, 1)),
                           np.random.choice([0, 1, 2], size=(500, 1))))
            inds = [0, 1, 2, 3]
            cats = [2, 3]
            hinds = [0, 3]
            ca = CausalAnalysis(inds,
                                cats,
                                hinds,
                                classification=classification,
                                nuisance_models='automl')
            ca.fit(X, y)
            glo = ca.global_causal_effect()
            coh = ca.cohort_causal_effect(X[:2])
            loc = ca.local_causal_effect(X[:2])

            # global and cohort data should have exactly the same structure, but different values
            assert glo.index.equals(coh.index)

            # local index should have as many times entries as global as there were rows passed in
            assert len(loc.index) == 2 * len(glo.index)

            assert glo.index.names == ['feature', 'feature_value']
            assert loc.index.names == ['sample'] + glo.index.names

            glo_dict = ca._global_causal_effect_dict()
            coh_dict = ca._cohort_causal_effect_dict(X[:2])
            loc_dict = ca._local_causal_effect_dict(X[:2])

            glo_point_est = np.array(
                glo_dict[_CausalInsightsConstants.PointEstimateKey])
            coh_point_est = np.array(
                coh_dict[_CausalInsightsConstants.PointEstimateKey])
            loc_point_est = np.array(
                loc_dict[_CausalInsightsConstants.PointEstimateKey])

            ca._policy_tree_output(X, 1)
            ca._heterogeneity_tree_string(X, 1)
            ca._heterogeneity_tree_string(X, 3)

            # Can't handle multi-dimensional treatments
            with self.assertRaises(AssertionError):
                ca._policy_tree_output(X, 3)

            # global shape is (d_y, sum(d_t))
            assert glo_point_est.shape == coh_point_est.shape == (1, 5)
            assert loc_point_est.shape == (2, ) + glo_point_est.shape
            if not classification:
                # ExitStack can be used as a "do nothing" ContextManager
                cm = ExitStack()
            else:
                cm = self.assertRaises(Exception)
            with cm:
                inf = ca.whatif(X[:2], np.ones(shape=(2, )), 1, y[:2])
                assert np.shape(inf.point_estimate) == np.shape(y[:2])
                inf.summary_frame()
                inf = ca.whatif(X[:2], np.ones(shape=(2, )), 2, y[:2])
                assert np.shape(inf.point_estimate) == np.shape(y[:2])
                inf.summary_frame()

                ca._whatif_dict(X[:2], np.ones(shape=(2, )), 1, y[:2])

            # features; for categoricals they should appear #cats-1 times each
            fts = ['x0', 'x1', 'x2', 'x3', 'x3']

            for i in range(len(fts)):
                assert fts[i] == glo.index[i][0] == loc.index[i][
                    1] == loc.index[len(fts) + i][1]

            badargs = [
                (inds, cats, [4]),  # hinds out of range
                (inds, cats, ["test"])  # hinds out of range
            ]

            for args in badargs:
                with self.assertRaises(Exception):
                    ca = CausalAnalysis(*args)
                    ca.fit(X, y)

예제 #5

파일 보기

파일: test_causal_analysis.py 프로젝트: FoundryAI/EconML

    def test_basic_array(self):
        for d_y in [(), (1, )]:
            for classification in [False, True]:
                y = np.random.choice([0, 1], size=(500, ) + d_y)
                X = np.hstack((np.random.normal(size=(500, 2)),
                               np.random.choice([0, 1], size=(500, 1)),
                               np.random.choice([0, 1, 2], size=(500, 1))))
                inds = [0, 1, 2, 3]
                cats = [2, 3]
                hinds = [0, 3]
                ca = CausalAnalysis(inds,
                                    cats,
                                    hinds,
                                    classification=classification)
                ca.fit(X, y)
                glo = ca.global_causal_effect()
                coh = ca.cohort_causal_effect(X[:2])
                loc = ca.local_causal_effect(X[:2])

                # global and cohort data should have exactly the same structure, but different values
                assert glo.index.equals(coh.index)

                # local index should have as many times entries as global as there were rows passed in
                assert len(loc.index) == 2 * len(glo.index)

                assert glo.index.names == ['feature', 'feature_value']
                assert loc.index.names == ['sample'] + glo.index.names

                glo_dict = ca._global_causal_effect_dict()
                coh_dict = ca._cohort_causal_effect_dict(X[:2])
                loc_dict = ca._local_causal_effect_dict(X[:2])

                glo_point_est = np.array(
                    glo_dict[_CausalInsightsConstants.PointEstimateKey])
                coh_point_est = np.array(
                    coh_dict[_CausalInsightsConstants.PointEstimateKey])
                loc_point_est = np.array(
                    loc_dict[_CausalInsightsConstants.PointEstimateKey])

                ca._heterogeneity_tree_output(X, 1)
                ca._heterogeneity_tree_output(X, 3)

                # Make sure we handle continuous, binary, and multi-class treatments
                # For multiple discrete treatments, one "always treat" value per non-default treatment
                for (idx, length) in [(0, 1), (1, 1), (2, 1), (3, 2)]:
                    _, policy_val, always_trt = ca._policy_tree_output(X, idx)
                    assert isinstance(always_trt, list)
                    assert np.array(policy_val).shape == ()
                    assert np.array(always_trt).shape == (length, )

                    # policy value should exceed always treating with any treatment
                    assert_less_close(always_trt, policy_val)

                # global shape is (d_y, sum(d_t))
                assert glo_point_est.shape == coh_point_est.shape == (1, 5)
                assert loc_point_est.shape == (2, ) + glo_point_est.shape
                if not classification:
                    # ExitStack can be used as a "do nothing" ContextManager
                    cm = ExitStack()
                else:
                    cm = self.assertRaises(Exception)
                with cm:
                    inf = ca.whatif(X[:2], np.ones(shape=(2, )), 1, y[:2])
                    assert np.shape(inf.point_estimate) == (2, )
                    inf = ca.whatif(X[:2], np.ones(shape=(2, )), 2, y[:2])
                    assert np.shape(inf.point_estimate) == (2, )

                    ca._whatif_dict(X[:2], np.ones(shape=(2, )), 1, y[:2])

                # features; for categoricals they should appear #cats-1 times each
                fts = ['x0', 'x1', 'x2', 'x3', 'x3']

                for i in range(len(fts)):
                    assert fts[i] == glo.index[i][0] == loc.index[i][
                        1] == loc.index[len(fts) + i][1]

                badargs = [
                    (inds, cats, [4]),  # hinds out of range
                    (inds, cats, ["test"])  # hinds out of range
                ]

                for args in badargs:
                    with self.assertRaises(Exception):
                        ca = CausalAnalysis(*args)
                        ca.fit(X, y)

예제 #6

파일 보기

    def test_basic_pandas(self):
        for classification in [False, True]:
            y = pd.Series(np.random.choice([0, 1], size=(500, )))
            X = pd.DataFrame({
                'a': np.random.normal(size=500),
                'b': np.random.normal(size=500),
                'c': np.random.choice([0, 1], size=500),
                'd': np.random.choice(['a', 'b', 'c'], size=500)
            })
            n_inds = [0, 1, 2, 3]
            t_inds = ['a', 'b', 'c', 'd']
            n_cats = [2, 3]
            t_cats = ['c', 'd']
            n_hinds = [0, 3]
            t_hinds = ['a', 'd']
            for (inds, cats, hinds) in [(n_inds, n_cats, n_hinds),
                                        (t_inds, t_cats, t_hinds)]:
                ca = CausalAnalysis(inds,
                                    cats,
                                    hinds,
                                    classification=classification)
                ca.fit(X, y)
                glo = ca.global_causal_effect()
                coh = ca.cohort_causal_effect(X[:2])
                loc = ca.local_causal_effect(X[:2])

                # global and cohort data should have exactly the same structure, but different values
                assert glo.index.equals(coh.index)

                # local index should have as many times entries as global as there were rows passed in
                assert len(loc.index) == 2 * len(glo.index)

                assert glo.index.names == ['feature', 'feature_value']
                assert loc.index.names == ['sample'] + glo.index.names

                # features; for categoricals they should appear #cats-1 times each
                fts = ['a', 'b', 'c', 'd', 'd']

                for i in range(len(fts)):
                    assert fts[i] == glo.index[i][0] == loc.index[i][
                        1] == loc.index[len(fts) + i][1]

                glo_dict = ca._global_causal_effect_dict()
                glo_dict2 = ca._global_causal_effect_dict(row_wise=True)

                coh_dict = ca._cohort_causal_effect_dict(X[:2])
                coh_dict2 = ca._cohort_causal_effect_dict(X[:2], row_wise=True)

                loc_dict = ca._local_causal_effect_dict(X[:2])
                loc_dict2 = ca._local_causal_effect_dict(X[:2], row_wise=True)

                glo_point_est = np.array(
                    glo_dict[_CausalInsightsConstants.PointEstimateKey])
                coh_point_est = np.array(
                    coh_dict[_CausalInsightsConstants.PointEstimateKey])
                loc_point_est = np.array(
                    loc_dict[_CausalInsightsConstants.PointEstimateKey])

                # global shape is (d_y, sum(d_t))
                assert glo_point_est.shape == coh_point_est.shape == (1, 5)
                assert loc_point_est.shape == (2, ) + glo_point_est.shape

                # global and cohort row-wise dicts have d_y * d_t entries
                assert len(glo_dict2[_CausalInsightsConstants.RowData]) == len(
                    coh_dict2[_CausalInsightsConstants.RowData]) == 5
                # local dictionary is flattened to n_rows * d_y * d_t
                assert len(loc_dict2[_CausalInsightsConstants.RowData]) == 10

                pto = ca._policy_tree_output(X, inds[1])
                ca._heterogeneity_tree_output(X, inds[1])
                ca._heterogeneity_tree_output(X, inds[3])

                # continuous treatments have typical treatment values equal to
                # the mean of the absolute value of non-zero entries
                np.testing.assert_allclose(ca.typical_treatment_value(inds[0]),
                                           np.mean(np.abs(X['a'])))
                np.testing.assert_allclose(ca.typical_treatment_value(inds[1]),
                                           np.mean(np.abs(X['b'])))
                # discrete treatments have typical treatment value 1
                assert ca.typical_treatment_value(
                    inds[2]) == ca.typical_treatment_value(inds[3]) == 1

                # Make sure we handle continuous, binary, and multi-class treatments
                # For multiple discrete treatments, one "always treat" value per non-default treatment
                for (idx, length) in [(0, 1), (1, 1), (2, 1), (3, 2)]:
                    pto = ca._policy_tree_output(X, inds[idx])
                    policy_val = pto.policy_value
                    always_trt = pto.always_treat
                    assert isinstance(pto.control_name, str)
                    assert isinstance(always_trt, dict)
                    assert np.array(policy_val).shape == ()
                    assert len(always_trt) == length
                    for val in always_trt.values():
                        assert np.array(val).shape == ()

                    # policy value should exceed always treating with any treatment
                    assert_less_close(np.array(list(always_trt.values())),
                                      policy_val)

                if not classification:
                    # ExitStack can be used as a "do nothing" ContextManager
                    cm = ExitStack()
                else:
                    cm = self.assertRaises(Exception)
                with cm:
                    inf = ca.whatif(X[:2], np.ones(shape=(2, )), inds[1],
                                    y[:2])
                    assert np.shape(inf.point_estimate) == np.shape(y[:2])
                    inf = ca.whatif(X[:2], np.ones(shape=(2, )), inds[2],
                                    y[:2])
                    assert np.shape(inf.point_estimate) == np.shape(y[:2])

                    ca._whatif_dict(X[:2], np.ones(shape=(2, )), inds[1],
                                    y[:2])
                    ca._whatif_dict(X[:2],
                                    np.ones(shape=(2, )),
                                    inds[1],
                                    y[:2],
                                    row_wise=True)

            badargs = [
                (n_inds, n_cats, [4]),  # hinds out of range
                (n_inds, n_cats, ["test"])  # hinds out of range
            ]

            for args in badargs:
                with self.assertRaises(Exception):
                    ca = CausalAnalysis(*args)
                    ca.fit(X, y)