Beispiel #1
0
    def testFactorDGP(self):
        """
        factor dbp based test
        """
        N1, N0 = 2, 100
        treated_units = [0, 1]
        T0, T1 = 20, 10
        K, R, F = 5, 5, 5
        (
            Cov_control,
            Cov_treated,
            Out_pre_control,
            Out_pre_treated,
            Out_post_control,
            Out_post_treated,
            _,
            _,
        ) = factor_dgp(N0, N1, T0, T1, K, R, F)

        Cov = np.vstack((Cov_treated, Cov_control))
        Out_pre = np.vstack((Out_pre_treated, Out_pre_control))
        Out_post = np.vstack((Out_post_treated, Out_post_control))

        SC.estimate_effects(
            Out_pre,
            Out_post,
            treated_units,
            Cov,
            # constrain="simplex", -- handled by argparse now..
            **command_line_options,
        )
Beispiel #2
0
    def test_all(self):
        for model_type in ["retrospective", "prospective", "prospective-restricted", "full"]: #
            TestFitForErrors.run_test(self, model_type)

        model_type = "retrospective"
        TestFitForErrors.run_test(self, model_type, w_pen_inner=True) #default is, w_pen_inner=False
        
        TestFitForErrors.run_test(self, model_type, match_space_maker=SparseSC.MTLassoCV_MatchSpace_factory()) 
        
        TestFitForErrors.run_test(self, model_type, match_space_maker=SparseSC.MTLassoMixed_MatchSpace_factory(v_pens=[1,2])) 
Beispiel #3
0
 def test_all(self):
     for match_maker in [
             None,
             SparseSC.MTLassoMixed_MatchSpace_factory(),
             SparseSC.MTLassoCV_MatchSpace_factory(),
             SparseSC.MTLSTMMixed_MatchSpace_factory(),
             SparseSC.Fixed_V_factory(np.full(self.X.shape[1], 1))
     ]:  #,
         TestFitFastForErrors.run_test(self, "retrospective", match_maker)
     for model_type in ["prospective", "prospective-restricted",
                        "full"]:  #"retrospective", (tested above)
         TestFitFastForErrors.run_test(self, model_type, None)
Beispiel #4
0
    def test_all(self):
        for model_type in ["prospective", "prospective-restricted", "full"]: #"retrospective", (tested below)
            TestFitFastForErrors.run_test(self, model_type, None)

        model_type="retrospective"
        for match_maker in (None, SparseSC.MTLassoMixed_MatchSpace_factory(), SparseSC.MTLassoCV_MatchSpace_factory(), 
                            SparseSC.MTLSTMMixed_MatchSpace_factory(), SparseSC.D_LassoCV_MatchSpace_factory(), 
                            SparseSC.Fixed_V_factory(np.full(self.X.shape[1], 1))): #, 
            TestFitFastForErrors.run_test(self, model_type, match_maker)

        TestFitFastForErrors.run_test(self, model_type, w_pen_inner=False) #default is, w_pen_inner=True
        TestFitFastForErrors.run_test(self, model_type, avoid_NxN_mats=True) #default is avoid_NxN_mats=False
        TestFitFastForErrors.run_test(self, model_type, avoid_NxN_mats=True, sc_Y_block_size=2) #default is avoid_NxN_mats=False
Beispiel #5
0
    def testSimpleTrendDGP(self):
        """
        No X, just Y; half the donors are great, other half are bad
        """
        N1, N0_sim, N0_not = 1, 50, 50
        N0 = N0_sim + N0_not
        N = N1 + N0
        treated_units, control_units = range(N1), range(N1, N)
        T0, T1 = 5, 2
        T = T0 + T1  # unused
        proto_sim = np.array([1, 2, 3, 4, 5] + [6, 7], ndmin=2)
        proto_not = np.array([0, 2, 4, 6, 8] + [10, 12], ndmin=2)
        te = 2
        proto_tr = proto_sim + np.hstack((np.zeros(
            (1, T0)), np.full((1, T1), te)))
        Y1 = np.matmul(np.ones((N1, 1)), proto_tr)
        Y0_sim = np.matmul(np.ones((N0_sim, 1)), proto_sim)
        Y0_sim = Y0_sim + np.random.normal(0, 0.1, Y0_sim.shape)
        #Y0_sim = Y0_sim + np.hstack((np.zeros((N0_sim,1)),
        #                             np.random.normal(0,0.1,(N0_sim,1)),
        #                             np.zeros((N0_sim,T-2))))
        Y0_not = np.matmul(np.ones((N0_not, 1)), proto_not)
        Y0_not = Y0_not + np.random.normal(0, 0.1, Y0_not.shape)
        Y = np.vstack((Y1, Y0_sim, Y0_not))

        unit_treatment_periods = np.full((N), -1)
        unit_treatment_periods[0] = T0

        # Y += np.random.normal(0, 0.01, Y.shape)

        # OPTIMIZE OVER THE V_PEN'S
        # for v_pen, w_pen in [(1,1), (1,1e-10), (1e-10,1e-10), (1e-10,1), (None, None)]: #
        # print("\nv_pen=%s, w_pen=%s" % (v_pen, w_pen))
        ret = SC.estimate_effects(
            Y,
            unit_treatment_periods,
            ret_CI=True,
            max_n_pl=200,
            fast=True,
            #stopping_rule=4,
            **command_line_options,
        )
        TestDGPs.simple_summ(ret.fits[T0], Y)
        V_penalty = ret.fits[T0].fitted_v_pen

        Y_sc = ret.fits[T0].predict(Y)  # [control_units, :]
        te_vec_est = (Y - Y_sc)[0:T0:]
        # weight_sums = np.sum(ret.fit.sc_weights, axis=1)

        # print(ret.fit.scores)
        p_value = ret.p_value
        #print("p-value: %s" % p_value)
        #print( ret.CI)
        #print(np.diag(ret.fit.V))
        #import pdb; pdb.set_trace()
        # print(ret)
        assert te in ret.CI, "Confidence interval does not include the true effect"
        assert p_value is not None
        assert p_value < 0.1, "P-value is larger than expected"
Beispiel #6
0
    def run_test(
            cls,
            obj,
            model_type="retrospective",
            frame_type="ndarray"):  #"NDFrame", "pandas_timeindex", NDFrame
        X = obj.X
        Y = obj.Y
        unit_treatment_periods = obj.unit_treatment_periods
        if frame_type == "NDFrame" or frame_type == "timeindex":
            X = pd.DataFrame(X)
            Y = pd.DataFrame(Y)
            if frame_type == "timeindex":
                t_index = pd.Index(
                    np.datetime64('2000-01-01', 'D') + range(Y.shape[1]))
                unit_treatment_periods = pd.Series(np.datetime64('NaT'),
                                                   index=Y.index)
                unit_treatment_periods[0] = t_index[7]
                unit_treatment_periods[1] = t_index[8]
                Y.columns = t_index

        SC.estimate_effects(covariates=X,
                            outcomes=Y,
                            model_type=model_type,
                            unit_treatment_periods=unit_treatment_periods)
Beispiel #7
0
 def L1_L2_const_obj_func (x): 
     n_calls[0] += 1
     t1 = time.time();
     score = SC.CV_score(X = X_control,
                         Y = Y_pre_control,
                         X_treat = X_treated,
                         Y_treat = Y_pre_treated,
                         # if v_pen is a single value, we get a single score, If it's an array of values, we get an array of scores.
                         v_pen = best_L1_penalty_ct * np.exp(x[0]),
                         w_pen = w_pen_start_ct / np.exp(x[0]),
                         # suppress the analysis type message
                         quiet = True)
     t2 = time.time(); 
     temp_results.append((n_calls[0],x,score))
     print("calls: %s, time: %0.4f, x0: %0.4f, Cross Validation Error: %s, out-of-sample R-Squared: %s" % (n_calls[0], t2 - t1, x[0], score, 1 - score / SS ))
     #print("calls: %s, time: %0.4f, x0: %0.4f, x1: %0.4f, Cross Validation Error: %s, R-Squared: %s" % (n_calls[0], t2 - t1, x[0], x[1], score, 1 - score / SS ))
     return score
Beispiel #8
0
from scipy.optimize.linesearch import LineSearchWarning
import numpy as np
import traceback

try:
    import SparseSC
    from SparseSC.fit import fit
    from SparseSC.fit_fast import fit_fast
except ImportError:
    raise RuntimeError(
        "SparseSC is not installed. Use 'pip install -e .' or 'conda develop .' from repo root to install in dev mode"
    )
#import warnings
#warnings.simplefilter("error")

SparseSC.keras_reproducible()  #for when I start testing for correctness

# pylint: disable=missing-docstring


class TestFitForErrors(unittest.TestCase):
    def setUp(self):

        random.seed(12345)
        np.random.seed(101101001)
        control_units = 50
        treated_units = 20
        features = 10
        targets = 5

        self.X = np.random.rand(control_units + treated_units, features)
Beispiel #9
0
#                       'cigsale_88':smoking_df.xs(1988, level='year')["cigsale"]})
#X_orig = pd.concat((X_avgs, X_spot), axis=1)
#X_orig.isnull().sum().sum() #0

X_full = pd.concat((X_avgs, beer_pre.unstack('year'), Xother_pre.unstack('year')), axis=1)
X_full_names = [c[0] + "(" + str(c[1]) + ")" if len(c)==2 else c for c in X_full.columns]
X_full.isnull().sum().sum() #0
X_full = X_full.values
X_Y_pre = np.concatenate((X_full, Y_pre), axis=1)
X_Y_pre_names = X_full_names + Y_pre_names
X_Y_pre_names_arr = np.array(X_Y_pre_names)


# Fast  ----------------------#

fast_fit = SC.fit_fast(X_Y_pre, Y_post, treated_units=[i_t])
#print(len(np.diag(fast_fit.V)))
#print(np.diag(fast_fit.V))
#Y_post_sc = fast_fit.predict(Y_post)
#Y_pre_sc = fast_fit.predict(Y_pre)
#post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2))
#pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2))
#print(pre_mse) #192.210632448
#print(post_mse) #129.190437803
#print(X_Y_pre_names_arr[fast_fit.match_space_desc>0])

# Full  ----------------------#

full_fit = SC.fit(X_Y_pre, Y_post, treated_units=[i_t])
print(np.diag(full_fit.V))
print(np.diag(full_fit.V)[np.diag(full_fit.V)>0])
Beispiel #10
0
    # in the leave-one-out scneario, the pre-treatment outcomes will be part of the covariates
    X_and_Y_pre = np.hstack( ( X, Y_pre,) )
    X_and_Y_pre_control = np.hstack( ( X_control, Y_pre_control,) )

    # IDENTIFIERS FOR TREAT AND CONTROL UNITS
    # control_units = np.arange( N0 * groups )
    # treated_units = np.arange( N1 * groups ) + N0

    # ------------------------------------------------------------
    # ------------------------------------------------------------
    # find default penalties
    # ------------------------------------------------------------
    # ------------------------------------------------------------

    # get starting point for the L2 penalty 
    w_pen_start_ct  = SC.w_pen_guestimate(X_control)
    w_pen_start_loo = SC.w_pen_guestimate(X_and_Y_pre_control)

    # get the maximum value for the L1 Penalty parameter conditional on the guestimate for the L2 penalty
    L1_max_ct  = SC.get_max_v_pen(X_control,Y_pre_control,X_treat=X_treated,Y_treat=Y_pre_treated)
    if False:
        L1_max_loo = SC.get_max_v_pen(X_and_Y_pre_control[np.arange(100)],Y_post[np.arange(100)])
        print("Max L1 loo %s " % L1_max_loo)
    else:
        L1_max_loo = np.float(147975295.9121998)

    if False:
        "Demonstrate relations between the L1 and L2 penalties"

        # get the maximum value for the L1 Penalty parameter conditional on several L2 penalty parameter values
        L2_grid = (2.** np.arange(-1,2))