def test_PE_F(self): for adaptive_weights in [False, 'ridge', 'decisiontree', 'ridgeCV','decisiontreeCV']: # is_correct = 1 try: model = None if adaptive_weights == False: df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) covar_importance = np.array([4,3,2,1,0,0,0]) weight_array = covar_importance/covar_importance.sum() model = matching.FLAME(repeats=False, verbose=0,adaptive_weights =adaptive_weights) model.fit(holdout_data=holdout,weight_array = list(weight_array)) output = model.predict(df) else: df, true_TE = generate_uniform_given_importance() holdout, true_TE = generate_uniform_given_importance() model = matching.FLAME(repeats=False, verbose=0,adaptive_weights =adaptive_weights) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='FLAME-Error when we use PE method: {0} '.format(str(adaptive_weights)))
def test_verbose_F(self): #Test verbose df, true_TE = generate_uniform_given_importance() for verbose in [0,1,2,3]: is_correct = 1 try: df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) covar_importance = np.array([4,3,2,1,0,0,0]) weight_array = covar_importance/covar_importance.sum() model = matching.FLAME(missing_data_replace = 2, want_bf = True, verbose = verbose) model.fit(holdout_data=holdout) output = model.predict(df) model = matching.FLAME(repeats=True,verbose=verbose) model.fit(holdout_data=0.5) output = model.predict(df) if check_statistics(model): is_correct = 0 break except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='FLAME-Error when verbose ={0}'.format(verbose))
def test_pre_dame_F(self): df, true_TE = generate_uniform_given_importance(num_control=500, num_treated=500, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) is_correct = 1 try: covar_importance = np.array([4,3,2,1,0,0,0]) weight_array = covar_importance/covar_importance.sum() for x in [False, True]: for y in [False, True]: model1 = matching.FLAME(repeats=x,want_pe = y, want_bf = y,verbose=0,adaptive_weights = False) model1.fit(holdout_data=holdout,weight_array = list(weight_array)) output = model1.predict(df, pre_dame = True) model2 = matching.FLAME(repeats=x, want_pe = y, want_bf = y,verbose=0,adaptive_weights = 'decisiontreeCV') model2.fit(holdout_data=holdout) output = model2.predict(df, pre_dame = True) output = model2.predict(df, pre_dame = True) if check_statistics(model1) or check_statistics(model2) : is_correct = 0 except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='FLAME-Error when we use pre_dame')
def test_miss_data_indicator_F(self): is_correct = 1 try: df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000) #Create missing df m, n = df.shape for i in range(int(m / 100)): for j in [0, int(n / 2)]: df.iloc[i, j] = 'a' holdout = df.copy() model = matching.FLAME(missing_indicator='a', missing_holdout_replace=1, missing_data_replace=1) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='FLAME-Error when missing_indicator')
def broken_treatment_column_name_value(): df, true_TE = generate_uniform_given_importance() holdout, true_TE = generate_uniform_given_importance() df.loc[0,'treated'] = 4 model = matching.FLAME() model.fit(holdout_data=holdout) output = model.predict(df)
def test_large_C_repeats_F(self): df_path = os.path.join((os.path.dirname(__file__)), 'basicTestData.csv') df = pd.read_csv(df_path) holdout_path = os.path.join((os.path.dirname(__file__)), 'basicHoldoutData.csv') holdout = pd.read_csv(holdout_path) model = matching.FLAME(repeats=False, verbose=1) model.fit(holdout_data=holdout) algo_output = model.predict(df, C=100000) result_path = os.path.join((os.path.dirname(__file__)), 'basicResultData.csv') result = pd.read_csv(result_path, index_col="Unnamed: 0") dfs_equal = 1 try: for index in result.index: for col in result.columns: if (result.loc[index, col] != "*" and algo_output.loc[index, col] != "*" and int(result.loc[index, col]) != int(algo_output.loc[index, col])): print("index, col", index, col) dfs_equal = 0 break except (KeyError, ValueError): # We would hit this block if theres a key error, so df columns # are not equal or have different units, or weird entry in df, (string) dfs_equal = 0 self.assertEqual(1, dfs_equal, msg='Data frames not equal on index {0}, col {1}'.format(index, col))
def broken_holdout_type(): df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100) holdout = df.copy() holdout.iloc[0,0] = 's' model = matching.FLAME() model.fit(holdout_data=holdout) output = model.predict(df)
def test_miss_data_F(self): is_correct = 1 try: for missing_holdout_replace in [0, 1, 2]: for missing_data_replace in [0, 1, 2, 3]: df, true_TE = generate_uniform_given_importance( num_control=1000, num_treated=1000) #Create missing df m, n = df.shape for i in range(int(m / 100)): for j in [0, int(n / 2)]: df.iloc[i, j] = np.nan holdout = df.copy() model = matching.FLAME( missing_holdout_replace=missing_holdout_replace, missing_data_replace=missing_data_replace) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 break except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='FLAME-Error when do missing data'\ 'handling with missing_holdout_replace = {0},missing_data_replace{1}'.format(missing_holdout_replace,missing_data_replace))
def test_datasets_F(self): df_path = os.path.join((os.path.dirname(__file__)), 'basicTestData.csv') for gen in [generate_uniform_given_importance,generate_binomial_given_importance,generate_binomial_decay_importance,df_path]: is_correct = 1 try: df = None holdout = None if type(gen) != str: df, true_TE = gen() holdout, true_TE = gen() else: df = gen holdout = gen model = matching.FLAME(repeats=False) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 break except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='FLAME-Error when we use the dataset generated by {0} '.format(str(gen)))
def broken_C(): df, true_TE = generate_uniform_given_importance() holdout, true_TE = generate_uniform_given_importance() model = matching.FLAME() model.fit(holdout_data=holdout) output = model.predict(df,C = -1)
def test_repeats_F(self): #Test other parameters df, true_TE = generate_uniform_given_importance( num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4, 3, 2, 1, 0, 0, 0]) holdout, true_TE = generate_uniform_given_importance( num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4, 3, 2, 1, 0, 0, 0]) is_correct = 1 try: model = matching.FLAME(repeats=True) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='FLAME-Error when repeat = True')
def broken_false_dataset(): df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000) holdout, true_TE = generate_uniform_given_importance( num_control=1000, num_treated=1000) model = matching.FLAME() model.fit(holdout_data=holdout) output = model.predict(False)
def broken_data_len(): df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) holdout, true_TE = generate_uniform_given_importance() model = matching.FLAME() model.fit(holdout_data=holdout) output = model.predict(df)
def test_other_param_F(self): is_correct = 1 try: df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) model = matching.FLAME( early_stop_pe= 1, verbose=0) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 # model = matching.FLAME( stop_unmatched_c= True, verbose=0) # model.fit(holdout_data=holdout) # output = model.predict(df) # if check_statistics(model): # is_correct = 0 # model = matching.FLAME(stop_unmatched_t= True, verbose=0) # model.fit(holdout_data=holdout) # output = model.predict(df) # if check_statistics(model): # is_correct = 0 model = matching.FLAME(early_stop_un_c_frac = 0.5, verbose=0) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 model = matching.FLAME(early_stop_un_t_frac = 0.5, verbose=0) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 model = matching.FLAME(early_stop_iterations= 2, verbose=0) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='FLAME-Error when other parameters')
def broken_column_match(): df, true_TE = generate_uniform_given_importance() holdout, true_TE = generate_uniform_given_importance() set_ = holdout.columns set_ = list(set_) set_[0] = 'dasfadf' holdout.columns = set_ model = matching.FLAME() model.fit(holdout_data=holdout) output = model.predict(df)
def broken_missing_data_replace(): df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) covar_importance = np.array([4,3,2,1,0,0,0]) weight_array = covar_importance/covar_importance.sum() model = matching.FLAME(missing_data_replace = 2, adaptive_weights =False) model.fit(holdout_data=holdout,weight_array = list(weight_array)) output = model.predict(df)
def test_no_matching_F(self): #Test data split df, true_TE = generate_uniform_given_importance(num_control=3000, num_treated=3000) is_correct = 1 try: df = pd.DataFrame([[1,2,0,1.0],[3,4,1,2.0],[5,6,0,5.0],[7,8,1,8.0],[9,10,1,10.0]]) df.columns = ['cov1','cov2','treated','outcome'] holdout = df.copy() model = matching.FLAME(repeats=True) model.fit(holdout_data=holdout) output = model.predict(df) except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='FLAME-Error when no matching')
def test_data_split_F(self): #Test data split df, true_TE = generate_uniform_given_importance(num_control=3000, num_treated=3000) is_correct = 1 try: for holdout in [0.3,0.5,0.7]: model = matching.FLAME(repeats=True) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 break except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='FLAME-Error when holdout = {0}'.format(holdout))
def test_has_unmatched_units_F(self): #Test data split is_correct = 1 try: df = pd.DataFrame([[1,2,0,1.0],[3,4,0,2.0],[5,6,0,5.0],[7,8,0,8.0],[9,10,1,10.0],[9,20,0,10.0]]) df.columns = ['cov1','cov2','treated','outcome'] holdout = df.copy() model = matching.FLAME(repeats=True) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='FLAME-Error when no matching')
def test_want_pebf_F(self): #Test df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) is_correct = 1 try: for want_pe in [False, True]: for want_bf in [False, True]: model = matching.FLAME(want_pe=want_pe,want_bf=want_bf) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model) or (want_pe and len(model.pe_each_iter)==0) or (want_bf and len(model.bf_each_iter)==0): is_correct = 0 break except (KeyError, ValueError): is_wrong = 0 self.assertEqual(1, is_correct, msg='FLAME Error when want_pe = {0} want_bf = {1}'.format(str(want_pe),str(want_bf)))
def broken_early_stop_pe_frac(): df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100) model = matching.FLAME(early_stop_pe_frac = 123) model.fit(holdout_data=df) output = model.predict(df)
def broken_outcome_column_name(): df, true_TE = generate_uniform_given_importance() holdout, true_TE = generate_uniform_given_importance() model = matching.FLAME() model.fit(holdout_data=holdout,outcome_column_name = "sadfdag") output = model.predict(df)
def broken_ATE_input_model(): model = matching.FLAME() ATE(model)
def broken_weight_array_sum(): df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100) model = matching.FLAME(adaptive_weights = False) model.fit(holdout_data=df, weight_array = [1,1,1,1]) output = model.predict(df)
def broken_adaptive_weights(): df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100) model = matching.FLAME(adaptive_weights = 'safdsaf') model.fit(holdout_data=df) output = model.predict(df)