def testFactorDGP(self): """ factor dbp based test """ N1, N0 = 2, 100 treated_units = [0, 1] T0, T1 = 20, 10 K, R, F = 5, 5, 5 ( Cov_control, Cov_treated, Out_pre_control, Out_pre_treated, Out_post_control, Out_post_treated, _, _, ) = factor_dgp(N0, N1, T0, T1, K, R, F) Cov = np.vstack((Cov_treated, Cov_control)) Out_pre = np.vstack((Out_pre_treated, Out_pre_control)) Out_post = np.vstack((Out_post_treated, Out_post_control)) SC.estimate_effects( Out_pre, Out_post, treated_units, Cov, # constrain="simplex", -- handled by argparse now.. **command_line_options, )
def test_all(self): for model_type in ["retrospective", "prospective", "prospective-restricted", "full"]: # TestFitForErrors.run_test(self, model_type) model_type = "retrospective" TestFitForErrors.run_test(self, model_type, w_pen_inner=True) #default is, w_pen_inner=False TestFitForErrors.run_test(self, model_type, match_space_maker=SparseSC.MTLassoCV_MatchSpace_factory()) TestFitForErrors.run_test(self, model_type, match_space_maker=SparseSC.MTLassoMixed_MatchSpace_factory(v_pens=[1,2]))
def test_all(self): for match_maker in [ None, SparseSC.MTLassoMixed_MatchSpace_factory(), SparseSC.MTLassoCV_MatchSpace_factory(), SparseSC.MTLSTMMixed_MatchSpace_factory(), SparseSC.Fixed_V_factory(np.full(self.X.shape[1], 1)) ]: #, TestFitFastForErrors.run_test(self, "retrospective", match_maker) for model_type in ["prospective", "prospective-restricted", "full"]: #"retrospective", (tested above) TestFitFastForErrors.run_test(self, model_type, None)
def test_all(self): for model_type in ["prospective", "prospective-restricted", "full"]: #"retrospective", (tested below) TestFitFastForErrors.run_test(self, model_type, None) model_type="retrospective" for match_maker in (None, SparseSC.MTLassoMixed_MatchSpace_factory(), SparseSC.MTLassoCV_MatchSpace_factory(), SparseSC.MTLSTMMixed_MatchSpace_factory(), SparseSC.D_LassoCV_MatchSpace_factory(), SparseSC.Fixed_V_factory(np.full(self.X.shape[1], 1))): #, TestFitFastForErrors.run_test(self, model_type, match_maker) TestFitFastForErrors.run_test(self, model_type, w_pen_inner=False) #default is, w_pen_inner=True TestFitFastForErrors.run_test(self, model_type, avoid_NxN_mats=True) #default is avoid_NxN_mats=False TestFitFastForErrors.run_test(self, model_type, avoid_NxN_mats=True, sc_Y_block_size=2) #default is avoid_NxN_mats=False
def testSimpleTrendDGP(self): """ No X, just Y; half the donors are great, other half are bad """ N1, N0_sim, N0_not = 1, 50, 50 N0 = N0_sim + N0_not N = N1 + N0 treated_units, control_units = range(N1), range(N1, N) T0, T1 = 5, 2 T = T0 + T1 # unused proto_sim = np.array([1, 2, 3, 4, 5] + [6, 7], ndmin=2) proto_not = np.array([0, 2, 4, 6, 8] + [10, 12], ndmin=2) te = 2 proto_tr = proto_sim + np.hstack((np.zeros( (1, T0)), np.full((1, T1), te))) Y1 = np.matmul(np.ones((N1, 1)), proto_tr) Y0_sim = np.matmul(np.ones((N0_sim, 1)), proto_sim) Y0_sim = Y0_sim + np.random.normal(0, 0.1, Y0_sim.shape) #Y0_sim = Y0_sim + np.hstack((np.zeros((N0_sim,1)), # np.random.normal(0,0.1,(N0_sim,1)), # np.zeros((N0_sim,T-2)))) Y0_not = np.matmul(np.ones((N0_not, 1)), proto_not) Y0_not = Y0_not + np.random.normal(0, 0.1, Y0_not.shape) Y = np.vstack((Y1, Y0_sim, Y0_not)) unit_treatment_periods = np.full((N), -1) unit_treatment_periods[0] = T0 # Y += np.random.normal(0, 0.01, Y.shape) # OPTIMIZE OVER THE V_PEN'S # for v_pen, w_pen in [(1,1), (1,1e-10), (1e-10,1e-10), (1e-10,1), (None, None)]: # # print("\nv_pen=%s, w_pen=%s" % (v_pen, w_pen)) ret = SC.estimate_effects( Y, unit_treatment_periods, ret_CI=True, max_n_pl=200, fast=True, #stopping_rule=4, **command_line_options, ) TestDGPs.simple_summ(ret.fits[T0], Y) V_penalty = ret.fits[T0].fitted_v_pen Y_sc = ret.fits[T0].predict(Y) # [control_units, :] te_vec_est = (Y - Y_sc)[0:T0:] # weight_sums = np.sum(ret.fit.sc_weights, axis=1) # print(ret.fit.scores) p_value = ret.p_value #print("p-value: %s" % p_value) #print( ret.CI) #print(np.diag(ret.fit.V)) #import pdb; pdb.set_trace() # print(ret) assert te in ret.CI, "Confidence interval does not include the true effect" assert p_value is not None assert p_value < 0.1, "P-value is larger than expected"
def run_test( cls, obj, model_type="retrospective", frame_type="ndarray"): #"NDFrame", "pandas_timeindex", NDFrame X = obj.X Y = obj.Y unit_treatment_periods = obj.unit_treatment_periods if frame_type == "NDFrame" or frame_type == "timeindex": X = pd.DataFrame(X) Y = pd.DataFrame(Y) if frame_type == "timeindex": t_index = pd.Index( np.datetime64('2000-01-01', 'D') + range(Y.shape[1])) unit_treatment_periods = pd.Series(np.datetime64('NaT'), index=Y.index) unit_treatment_periods[0] = t_index[7] unit_treatment_periods[1] = t_index[8] Y.columns = t_index SC.estimate_effects(covariates=X, outcomes=Y, model_type=model_type, unit_treatment_periods=unit_treatment_periods)
def L1_L2_const_obj_func (x): n_calls[0] += 1 t1 = time.time(); score = SC.CV_score(X = X_control, Y = Y_pre_control, X_treat = X_treated, Y_treat = Y_pre_treated, # if v_pen is a single value, we get a single score, If it's an array of values, we get an array of scores. v_pen = best_L1_penalty_ct * np.exp(x[0]), w_pen = w_pen_start_ct / np.exp(x[0]), # suppress the analysis type message quiet = True) t2 = time.time(); temp_results.append((n_calls[0],x,score)) print("calls: %s, time: %0.4f, x0: %0.4f, Cross Validation Error: %s, out-of-sample R-Squared: %s" % (n_calls[0], t2 - t1, x[0], score, 1 - score / SS )) #print("calls: %s, time: %0.4f, x0: %0.4f, x1: %0.4f, Cross Validation Error: %s, R-Squared: %s" % (n_calls[0], t2 - t1, x[0], x[1], score, 1 - score / SS )) return score
from scipy.optimize.linesearch import LineSearchWarning import numpy as np import traceback try: import SparseSC from SparseSC.fit import fit from SparseSC.fit_fast import fit_fast except ImportError: raise RuntimeError( "SparseSC is not installed. Use 'pip install -e .' or 'conda develop .' from repo root to install in dev mode" ) #import warnings #warnings.simplefilter("error") SparseSC.keras_reproducible() #for when I start testing for correctness # pylint: disable=missing-docstring class TestFitForErrors(unittest.TestCase): def setUp(self): random.seed(12345) np.random.seed(101101001) control_units = 50 treated_units = 20 features = 10 targets = 5 self.X = np.random.rand(control_units + treated_units, features)
# 'cigsale_88':smoking_df.xs(1988, level='year')["cigsale"]}) #X_orig = pd.concat((X_avgs, X_spot), axis=1) #X_orig.isnull().sum().sum() #0 X_full = pd.concat((X_avgs, beer_pre.unstack('year'), Xother_pre.unstack('year')), axis=1) X_full_names = [c[0] + "(" + str(c[1]) + ")" if len(c)==2 else c for c in X_full.columns] X_full.isnull().sum().sum() #0 X_full = X_full.values X_Y_pre = np.concatenate((X_full, Y_pre), axis=1) X_Y_pre_names = X_full_names + Y_pre_names X_Y_pre_names_arr = np.array(X_Y_pre_names) # Fast ----------------------# fast_fit = SC.fit_fast(X_Y_pre, Y_post, treated_units=[i_t]) #print(len(np.diag(fast_fit.V))) #print(np.diag(fast_fit.V)) #Y_post_sc = fast_fit.predict(Y_post) #Y_pre_sc = fast_fit.predict(Y_pre) #post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2)) #pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2)) #print(pre_mse) #192.210632448 #print(post_mse) #129.190437803 #print(X_Y_pre_names_arr[fast_fit.match_space_desc>0]) # Full ----------------------# full_fit = SC.fit(X_Y_pre, Y_post, treated_units=[i_t]) print(np.diag(full_fit.V)) print(np.diag(full_fit.V)[np.diag(full_fit.V)>0])
# in the leave-one-out scneario, the pre-treatment outcomes will be part of the covariates X_and_Y_pre = np.hstack( ( X, Y_pre,) ) X_and_Y_pre_control = np.hstack( ( X_control, Y_pre_control,) ) # IDENTIFIERS FOR TREAT AND CONTROL UNITS # control_units = np.arange( N0 * groups ) # treated_units = np.arange( N1 * groups ) + N0 # ------------------------------------------------------------ # ------------------------------------------------------------ # find default penalties # ------------------------------------------------------------ # ------------------------------------------------------------ # get starting point for the L2 penalty w_pen_start_ct = SC.w_pen_guestimate(X_control) w_pen_start_loo = SC.w_pen_guestimate(X_and_Y_pre_control) # get the maximum value for the L1 Penalty parameter conditional on the guestimate for the L2 penalty L1_max_ct = SC.get_max_v_pen(X_control,Y_pre_control,X_treat=X_treated,Y_treat=Y_pre_treated) if False: L1_max_loo = SC.get_max_v_pen(X_and_Y_pre_control[np.arange(100)],Y_post[np.arange(100)]) print("Max L1 loo %s " % L1_max_loo) else: L1_max_loo = np.float(147975295.9121998) if False: "Demonstrate relations between the L1 and L2 penalties" # get the maximum value for the L1 Penalty parameter conditional on several L2 penalty parameter values L2_grid = (2.** np.arange(-1,2))