Ejemplo n.º 1
0
    def test_PE_F(self):
        for adaptive_weights in [False, 'ridge', 'decisiontree', 'ridgeCV','decisiontreeCV']: #
            is_correct = 1
            try:
                model = None
                if adaptive_weights == False:
                    df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                              num_cov=7, min_val=0,
                                              max_val=3, covar_importance=[4,3,2,1,0,0,0])
                    holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                                          num_cov=7, min_val=0,
                                                              max_val=3, covar_importance=[4,3,2,1,0,0,0])
                    covar_importance = np.array([4,3,2,1,0,0,0])
                    weight_array = covar_importance/covar_importance.sum()
                    model = matching.FLAME(repeats=False, verbose=0,adaptive_weights =adaptive_weights)
                    model.fit(holdout_data=holdout,weight_array = list(weight_array))
                    output = model.predict(df)
                else:
                    df, true_TE = generate_uniform_given_importance()
                    holdout, true_TE = generate_uniform_given_importance()
                    model = matching.FLAME(repeats=False, verbose=0,adaptive_weights =adaptive_weights)
                    model.fit(holdout_data=holdout)
                    output = model.predict(df)

                if check_statistics(model):
                    is_correct = 0
                    

            except (KeyError, ValueError):
                is_correct = 0

            self.assertEqual(1, is_correct,
                             msg='FLAME-Error when we use PE method: {0} '.format(str(adaptive_weights)))
Ejemplo n.º 2
0
    def test_verbose_F(self):
        #Test verbose
        df, true_TE = generate_uniform_given_importance()
        for verbose in [0,1,2,3]:
            is_correct = 1
            try:
                df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000,
                                                              num_cov=7, min_val=0,
                                                              max_val=3, covar_importance=[4,3,2,1,0,0,0])
                holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                                      num_cov=7, min_val=0,
                                                          max_val=3, covar_importance=[4,3,2,1,0,0,0])
                covar_importance = np.array([4,3,2,1,0,0,0])
                weight_array = covar_importance/covar_importance.sum()
                model = matching.FLAME(missing_data_replace = 2, want_bf = True, verbose = verbose)
                model.fit(holdout_data=holdout)
                output = model.predict(df)
                model = matching.FLAME(repeats=True,verbose=verbose)
                model.fit(holdout_data=0.5)
                output = model.predict(df)
                if check_statistics(model):
                    is_correct = 0
                    break
            except (KeyError, ValueError):
                is_correct = 0

            self.assertEqual(1, is_correct, msg='FLAME-Error when verbose ={0}'.format(verbose))
Ejemplo n.º 3
0
    def test_pre_dame_F(self):
        df, true_TE = generate_uniform_given_importance(num_control=500, num_treated=500,
                                  num_cov=7, min_val=0,
                                  max_val=3, covar_importance=[4,3,2,1,0,0,0])
        holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                              num_cov=7, min_val=0,
                                                  max_val=3, covar_importance=[4,3,2,1,0,0,0])
        is_correct = 1
        try:
            covar_importance = np.array([4,3,2,1,0,0,0])
            weight_array = covar_importance/covar_importance.sum()
            for x in [False, True]:
                for y in [False, True]:
                    model1 = matching.FLAME(repeats=x,want_pe = y, want_bf = y,verbose=0,adaptive_weights = False)
                    model1.fit(holdout_data=holdout,weight_array = list(weight_array))
                    output = model1.predict(df, pre_dame = True)
                    model2 = matching.FLAME(repeats=x, want_pe = y, want_bf = y,verbose=0,adaptive_weights = 'decisiontreeCV')
                    model2.fit(holdout_data=holdout)
                    output = model2.predict(df, pre_dame = True)

                    output = model2.predict(df, pre_dame = True)
                    if check_statistics(model1) or check_statistics(model2) :
                        is_correct = 0
                
        except (KeyError, ValueError):
            is_correct = 0

        self.assertEqual(1, is_correct,
                         msg='FLAME-Error when we use pre_dame')
    def test_miss_data_indicator_F(self):
        is_correct = 1
        try:
            df, true_TE = generate_uniform_given_importance(num_control=1000,
                                                            num_treated=1000)
            #Create missing df
            m, n = df.shape
            for i in range(int(m / 100)):
                for j in [0, int(n / 2)]:
                    df.iloc[i, j] = 'a'
            holdout = df.copy()

            model = matching.FLAME(missing_indicator='a',
                                   missing_holdout_replace=1,
                                   missing_data_replace=1)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
            if check_statistics(model):
                is_correct = 0

        except (KeyError, ValueError):
            is_correct = 0
        self.assertEqual(1,
                         is_correct,
                         msg='FLAME-Error when missing_indicator')
Ejemplo n.º 5
0
 def broken_treatment_column_name_value():
     df, true_TE = generate_uniform_given_importance()
     holdout, true_TE = generate_uniform_given_importance()
     df.loc[0,'treated'] = 4
     model = matching.FLAME()
     model.fit(holdout_data=holdout)
     output = model.predict(df)
Ejemplo n.º 6
0
 def test_large_C_repeats_F(self):
     
     df_path = os.path.join((os.path.dirname(__file__)), 'basicTestData.csv')
     df = pd.read_csv(df_path)
 
     holdout_path = os.path.join((os.path.dirname(__file__)), 'basicHoldoutData.csv')
     holdout = pd.read_csv(holdout_path)
     model = matching.FLAME(repeats=False, verbose=1)
     model.fit(holdout_data=holdout)
     algo_output = model.predict(df, C=100000)
     
     result_path = os.path.join((os.path.dirname(__file__)), 'basicResultData.csv')
     result = pd.read_csv(result_path, index_col="Unnamed: 0")
     
     dfs_equal = 1
     try:
         for index in result.index:
             for col in result.columns:
                 if (result.loc[index, col] != "*" and
                     algo_output.loc[index, col] != "*" and
                     int(result.loc[index, col]) != int(algo_output.loc[index, col])):
                         print("index, col", index, col)
                         dfs_equal = 0
                         break
     except (KeyError, ValueError):
         # We would hit this block if theres a key error, so df columns
         # are not equal or have different units, or weird entry in df, (string)
         dfs_equal = 0
     
     self.assertEqual(1, dfs_equal,
                      msg='Data frames not equal on index {0}, col {1}'.format(index, col))
Ejemplo n.º 7
0
 def broken_holdout_type():
     df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100)
     holdout = df.copy()
     holdout.iloc[0,0] = 's'
     model = matching.FLAME()
     model.fit(holdout_data=holdout)
     output = model.predict(df)
    def test_miss_data_F(self):
        is_correct = 1
        try:
            for missing_holdout_replace in [0, 1, 2]:
                for missing_data_replace in [0, 1, 2, 3]:
                    df, true_TE = generate_uniform_given_importance(
                        num_control=1000, num_treated=1000)
                    #Create missing df
                    m, n = df.shape
                    for i in range(int(m / 100)):
                        for j in [0, int(n / 2)]:
                            df.iloc[i, j] = np.nan
                    holdout = df.copy()

                    model = matching.FLAME(
                        missing_holdout_replace=missing_holdout_replace,
                        missing_data_replace=missing_data_replace)
                    model.fit(holdout_data=holdout)
                    output = model.predict(df)
                    if check_statistics(model):
                        is_correct = 0
                        break

        except (KeyError, ValueError):
            is_correct = 0
        self.assertEqual(1, is_correct, msg='FLAME-Error when do missing data'\
                             'handling with missing_holdout_replace = {0},missing_data_replace{1}'.format(missing_holdout_replace,missing_data_replace))
Ejemplo n.º 9
0
    def test_datasets_F(self):
        df_path = os.path.join((os.path.dirname(__file__)), 'basicTestData.csv')

        for gen in [generate_uniform_given_importance,generate_binomial_given_importance,generate_binomial_decay_importance,df_path]:
            is_correct = 1
            try:
                df = None
                holdout = None
                if type(gen) != str:
                    df, true_TE = gen()
                    holdout, true_TE = gen()
                else:
                    df  = gen
                    holdout = gen
                model = matching.FLAME(repeats=False)
                model.fit(holdout_data=holdout)
                output = model.predict(df)
            
                if check_statistics(model):
                    is_correct = 0
                    break

            except (KeyError, ValueError):
                is_correct = 0

            self.assertEqual(1, is_correct,
                             msg='FLAME-Error when we use the dataset generated by {0} '.format(str(gen)))
Ejemplo n.º 10
0
        def broken_C():
            df, true_TE = generate_uniform_given_importance()
            holdout, true_TE = generate_uniform_given_importance()

            model = matching.FLAME()
            model.fit(holdout_data=holdout)
            output = model.predict(df,C = -1)
    def test_repeats_F(self):
        #Test other parameters
        df, true_TE = generate_uniform_given_importance(
            num_control=100,
            num_treated=100,
            num_cov=7,
            min_val=0,
            max_val=3,
            covar_importance=[4, 3, 2, 1, 0, 0, 0])
        holdout, true_TE = generate_uniform_given_importance(
            num_control=100,
            num_treated=100,
            num_cov=7,
            min_val=0,
            max_val=3,
            covar_importance=[4, 3, 2, 1, 0, 0, 0])
        is_correct = 1
        try:
            model = matching.FLAME(repeats=True)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
            if check_statistics(model):
                is_correct = 0

        except (KeyError, ValueError):
            is_correct = 0

        self.assertEqual(1, is_correct, msg='FLAME-Error when repeat = True')
 def broken_false_dataset():
     df, true_TE = generate_uniform_given_importance(num_control=1000,
                                                     num_treated=1000)
     holdout, true_TE = generate_uniform_given_importance(
         num_control=1000, num_treated=1000)
     model = matching.FLAME()
     model.fit(holdout_data=holdout)
     output = model.predict(False)
Ejemplo n.º 13
0
 def broken_data_len():
     df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000,
                                           num_cov=7, min_val=0,
                                           max_val=3, covar_importance=[4,3,2,1,0,0,0])
     holdout, true_TE = generate_uniform_given_importance()
     model = matching.FLAME()
     model.fit(holdout_data=holdout)
     output = model.predict(df)
Ejemplo n.º 14
0
    def test_other_param_F(self):
        is_correct = 1
        try:
            df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000,
                                                  num_cov=7, min_val=0,
                                                  max_val=3, covar_importance=[4,3,2,1,0,0,0])
            holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                                  num_cov=7, min_val=0,
                                                      max_val=3, covar_importance=[4,3,2,1,0,0,0])
            
            model = matching.FLAME( early_stop_pe= 1, verbose=0)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
            if check_statistics(model):
                is_correct = 0
#            model = matching.FLAME( stop_unmatched_c= True, verbose=0)
#            model.fit(holdout_data=holdout)
#            output = model.predict(df)
#            if check_statistics(model):
#                is_correct = 0
#            model = matching.FLAME(stop_unmatched_t= True, verbose=0)
#            model.fit(holdout_data=holdout)
#            output = model.predict(df)
#            if check_statistics(model):
#                is_correct = 0
            model = matching.FLAME(early_stop_un_c_frac = 0.5, verbose=0)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
            if check_statistics(model):
                is_correct = 0
            model = matching.FLAME(early_stop_un_t_frac = 0.5, verbose=0)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
            if check_statistics(model):
                is_correct = 0
            model = matching.FLAME(early_stop_iterations= 2, verbose=0)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
            if check_statistics(model):
                is_correct = 0
                
        except (KeyError, ValueError):
            is_correct = 0
        self.assertEqual(1, is_correct, msg='FLAME-Error when other parameters')
Ejemplo n.º 15
0
 def broken_column_match():
     df, true_TE = generate_uniform_given_importance()
     holdout, true_TE = generate_uniform_given_importance()
     set_ = holdout.columns
     set_ = list(set_)
     set_[0] = 'dasfadf'
     holdout.columns  = set_
     model = matching.FLAME()
     model.fit(holdout_data=holdout)
     output = model.predict(df)
Ejemplo n.º 16
0
 def broken_missing_data_replace():
         df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                                       num_cov=7, min_val=0,
                                                       max_val=3, covar_importance=[4,3,2,1,0,0,0])
         holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                               num_cov=7, min_val=0,
                                                   max_val=3, covar_importance=[4,3,2,1,0,0,0])
         covar_importance = np.array([4,3,2,1,0,0,0])
         weight_array = covar_importance/covar_importance.sum()
         model = matching.FLAME(missing_data_replace = 2, adaptive_weights =False)
         model.fit(holdout_data=holdout,weight_array = list(weight_array))
         output = model.predict(df)
Ejemplo n.º 17
0
    def test_no_matching_F(self):
        #Test data split
        df, true_TE = generate_uniform_given_importance(num_control=3000, num_treated=3000)

        is_correct = 1
        try:
            df = pd.DataFrame([[1,2,0,1.0],[3,4,1,2.0],[5,6,0,5.0],[7,8,1,8.0],[9,10,1,10.0]])
            df.columns = ['cov1','cov2','treated','outcome']
            holdout = df.copy()
            model = matching.FLAME(repeats=True)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
        except (KeyError, ValueError):
            is_correct = 0

        self.assertEqual(1, is_correct, msg='FLAME-Error when no matching')
Ejemplo n.º 18
0
    def test_data_split_F(self):
        #Test data split
        df, true_TE = generate_uniform_given_importance(num_control=3000, num_treated=3000)

        is_correct = 1
        try:
            for holdout in [0.3,0.5,0.7]:
                model = matching.FLAME(repeats=True)
                model.fit(holdout_data=holdout)
                output = model.predict(df)
                if check_statistics(model):
                    is_correct = 0
                    break
        except (KeyError, ValueError):
            is_correct = 0

        self.assertEqual(1, is_correct, msg='FLAME-Error when holdout = {0}'.format(holdout))
Ejemplo n.º 19
0
    def test_has_unmatched_units_F(self):
        #Test data split
        is_correct = 1
        try:
            df = pd.DataFrame([[1,2,0,1.0],[3,4,0,2.0],[5,6,0,5.0],[7,8,0,8.0],[9,10,1,10.0],[9,20,0,10.0]])
            df.columns = ['cov1','cov2','treated','outcome']
            holdout = df.copy()
            model = matching.FLAME(repeats=True)
            model.fit(holdout_data=holdout)
            output = model.predict(df)

            if check_statistics(model):
                is_correct = 0
        except (KeyError, ValueError):
            is_correct = 0

        self.assertEqual(1, is_correct, msg='FLAME-Error when no matching')
Ejemplo n.º 20
0
    def test_want_pebf_F(self):
        #Test
        df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                              num_cov=7, min_val=0,
                                              max_val=3, covar_importance=[4,3,2,1,0,0,0])
        holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                              num_cov=7, min_val=0,
                                              max_val=3, covar_importance=[4,3,2,1,0,0,0])

        is_correct = 1
        try:
            for want_pe in [False, True]:
                for want_bf in [False, True]:
                    model = matching.FLAME(want_pe=want_pe,want_bf=want_bf)
                    model.fit(holdout_data=holdout)
                    output = model.predict(df)
                    if check_statistics(model) or (want_pe and len(model.pe_each_iter)==0) or (want_bf and len(model.bf_each_iter)==0):
                        is_correct = 0
                        break

        except (KeyError, ValueError):
            is_wrong = 0
        self.assertEqual(1, is_correct, msg='FLAME Error when want_pe = {0} want_bf = {1}'.format(str(want_pe),str(want_bf)))
Ejemplo n.º 21
0
 def broken_early_stop_pe_frac():
     df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100)
     model = matching.FLAME(early_stop_pe_frac = 123)
     model.fit(holdout_data=df)
     output = model.predict(df)
Ejemplo n.º 22
0
 def broken_outcome_column_name():
     df, true_TE = generate_uniform_given_importance()
     holdout, true_TE = generate_uniform_given_importance()
     model = matching.FLAME()
     model.fit(holdout_data=holdout,outcome_column_name =  "sadfdag")
     output = model.predict(df)
Ejemplo n.º 23
0
 def broken_ATE_input_model():
     model = matching.FLAME()
     ATE(model)
Ejemplo n.º 24
0
 def broken_weight_array_sum():
     df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100)
     model = matching.FLAME(adaptive_weights = False)
     model.fit(holdout_data=df, weight_array = [1,1,1,1])
     output = model.predict(df)
Ejemplo n.º 25
0
 def broken_adaptive_weights():
     df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100)
     model = matching.FLAME(adaptive_weights = 'safdsaf')
     model.fit(holdout_data=df)
     output = model.predict(df)