def test_dist_table_sum_row(cps_subsample): rec = Records.cps_constructor(data=cps_subsample) calc = Calculator(policy=Policy(), records=rec) calc.calc_all() tb1 = create_distribution_table(calc.distribution_table_dataframe(), 'standard_income_bins', 'expanded_income') tb2 = create_distribution_table(calc.distribution_table_dataframe(), 'soi_agi_bins', 'expanded_income') assert np.allclose(tb1[-1:], tb2[-1:])
def test_dist_table_sum_row(cps_subsample): rec = Records.cps_constructor(data=cps_subsample) calc = Calculator(policy=Policy(), records=rec) calc.calc_all() tb1 = create_distribution_table(calc.distribution_table_dataframe(), 'standard_income_bins', 'expanded_income') tb2 = create_distribution_table(calc.distribution_table_dataframe(), 'soi_agi_bins', 'expanded_income') assert np.allclose(tb1[-1:], tb2[-1:])
def test_dist_table_sum_row(pit_subsample, cit_crosssample): rec = Records(data=pit_subsample) crec = CorpRecords(data=cit_crosssample) calc = Calculator(policy=Policy(), records=rec, corprecords=crec) calc.calc_all() tb1 = create_distribution_table(calc.distribution_table_dataframe(), 'standard_income_bins', 'GTI') tb2 = create_distribution_table(calc.distribution_table_dataframe(), 'weighted_deciles', 'GTI') allrow1 = tb1[-1:] allrow2 = tb2[-4:-3] assert np.allclose(allrow1, allrow2)
def distribution_table(self, year, groupby, income_measure, calc): """ Method to create a distribution table Parameters ---------- year: which year the distribution table data should be from groupby: determines how the rows in the table are sorted options: 'weighted_deciles', 'standard_income_bins', 'soi_agi_bin' income_measure: determines which variable is used to sort the rows in the table options: 'expanded_income' or 'expanded_income_baseline' calc: which calculator to use: base or reform Returns ------- DataFrame containing a distribution table """ # pull desired data if calc.lower() == "base": data = self.base_data[year] elif calc.lower() == "reform": data = self.reform_data[year] else: raise ValueError("calc must be either BASE or REFORM") # minor data preparation before calling the function data["num_returns_ItemDed"] = data["s006"].where( data["c04470"] > 0., 0.) data["num_returns_StandardDed"] = data["s006"].where( data["standard"] > 0., 0.) data["num_returns_AMT"] = data["s006"].where(data["c09600"] > 0., 0.) if income_measure == "expanded_income_baseline": base_income = self.base_data[year]["expanded_income"] data["expanded_income_baseline"] = base_income table = create_distribution_table(data, groupby, income_measure) return table
def test_dist_table_sum_row(records_2009): # Create a default Policy object policy1 = Policy() # Create a Calculator calc1 = Calculator(policy=policy1, records=records_2009) calc1.calc_all() tb1 = create_distribution_table(calc1.records, groupby='small_income_bins', result_type='weighted_sum') tb2 = create_distribution_table(calc1.records, groupby='large_income_bins', result_type='weighted_sum') assert np.allclose(tb1[-1:], tb2[-1:]) tb3 = create_distribution_table(calc1.records, groupby='small_income_bins', result_type='weighted_avg') assert isinstance(tb3, pd.DataFrame)
def test_dist_table_sum_row(cps_subsample): rec = Records.cps_constructor(data=cps_subsample) calc = Calculator(policy=Policy(), records=rec) calc.calc_all() tb1 = create_distribution_table(calc.records, groupby='small_income_bins', income_measure='expanded_income', result_type='weighted_sum') tb2 = create_distribution_table(calc.records, groupby='large_income_bins', income_measure='expanded_income', result_type='weighted_sum') assert np.allclose(tb1[-1:], tb2[-1:]) tb3 = create_distribution_table(calc.records, groupby='small_income_bins', income_measure='expanded_income', result_type='weighted_avg') assert isinstance(tb3, pd.DataFrame)
def summary_dist_xdec(res, df1, df2): """ res is dictionary of summary-results DataFrames. df1 contains results variables for baseline policy. df2 contains results variables for reform policy. returns augmented dictionary of summary-results DataFrames. """ # create distribution tables grouped by xdec res['dist1_xdec'] = \ create_distribution_table(df1, 'weighted_deciles', 'expanded_income') df2['expanded_income_baseline'] = df1['expanded_income'] res['dist2_xdec'] = \ create_distribution_table(df2, 'weighted_deciles', 'expanded_income_baseline') del df2['expanded_income_baseline'] # return res dictionary return res
def distribution_table(self, year: int, groupby: str, income_measure: str, calc: str, pop_quantiles: bool = False) -> pd.DataFrame: """ Method to create a distribution table Parameters ---------- year: int which year the distribution table data should be from groupby: str determines how the rows in the table are sorted options: 'weighted_deciles', 'standard_income_bins', 'soi_agi_bin' income_measure: str determines which variable is used to sort the rows in the table options: 'expanded_income' or 'expanded_income_baseline' calc: str which calculator to use, can take either `'REFORM'` or `'BASE'` calc: which calculator to use: base or reform pop_quantiles: bool whether or not weighted_deciles contain equal number of tax units (False) or people (True) Returns ------- table: Pandas DataFrame distribution table """ # pull desired data if calc.lower() == "base": data = self.base_data[year] elif calc.lower() == "reform": data = self.reform_data[year] else: raise ValueError("calc must be either BASE or REFORM") # minor data preparation before calling the function if pop_quantiles: data["count"] = data["s006"] * data["XTOT"] else: data["count"] = data["s006"] data["count_ItemDed"] = data["count"].where(data["c04470"] > 0., 0.) data["count_StandardDed"] = data["count"].where( data["standard"] > 0., 0.) data["count_AMT"] = data["count"].where(data["c09600"] > 0., 0.) if income_measure == "expanded_income_baseline": base_income = self.base_data[year]["expanded_income"] data["expanded_income_baseline"] = base_income table = create_distribution_table(data, groupby, income_measure, pop_quantiles) return table
def test_row_classifier(puf_1991, weights_1991): # create a current-law Policy object and Calculator calc1 policy1 = Policy() records1 = Records(data=puf_1991, weights=weights_1991, start_year=2009) calc1 = Calculator(policy=policy1, records=records1) calc1.calc_all() calc1_s006 = create_distribution_table(calc1.records, groupby='webapp_income_bins', result_type='weighted_sum').s006 # create a policy-reform Policy object and Calculator calc2 reform = {2013: {'_ALD_StudentLoan_hc': [1]}} policy2 = Policy() policy2.implement_reform(reform) records2 = Records(data=puf_1991, weights=weights_1991, start_year=2009) calc2 = Calculator(policy=policy2, records=records2) calc2.calc_all() calc2_s006 = create_distribution_table(calc2.records, groupby='webapp_income_bins', result_type='weighted_sum', baseline_obj=calc1.records).s006 # use weighted sum of weights in each cell to check classifer assert_array_equal(calc1_s006, calc2_s006)
def test_create_tables(puf_1991, weights_1991): # create a current-law Policy object and Calculator object calc1 policy1 = Policy() records1 = Records(data=puf_1991, weights=weights_1991, start_year=2009) calc1 = Calculator(policy=policy1, records=records1) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt4': [0.56]}} policy2 = Policy() policy2.implement_reform(reform) records2 = Records(data=puf_1991, weights=weights_1991, start_year=2009) calc2 = Calculator(policy=policy2, records=records2) calc2.calc_all() # test creating various distribution tables dt1 = create_difference_table(calc1.records, calc2.records, groupby='large_income_bins') assert isinstance(dt1, pd.DataFrame) dt2 = create_difference_table(calc1.records, calc2.records, groupby='webapp_income_bins') assert isinstance(dt2, pd.DataFrame) with pytest.raises(ValueError): create_difference_table(calc1.records, calc2.records, groupby='bad_bins') with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='small_income_bins', result_type='bad_result_type') with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='bad_bins', result_type='weighted_sum') dt3 = create_distribution_table(calc2.records, groupby='small_income_bins', result_type='weighted_sum', baseline_obj=calc1.records, diffs=True) assert isinstance(dt3, pd.DataFrame) calc1.increment_year() with pytest.raises(ValueError): create_difference_table(calc1.records, calc2.records, groupby='large_income_bins') with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='small_income_bins', result_type='weighted_sum', baseline_obj=calc1.records, diffs=True)
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() # test creating various difference tables diff = create_difference_table(calc1.records, calc2.records, groupby='large_income_bins', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, 0.01, 0.41, 0.84, 0.92, 1.10, 1.15, 1.04, 0.78, 0.27, np.nan ] assert np.allclose(diff['perc_aftertax'], expected, atol=0.005, rtol=0.0, equal_nan=True) diff = create_difference_table(calc1.records, calc2.records, groupby='webapp_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, 0.01, 0.41, 0.84, 0.92, 1.10, 1.15, 1.04, 0.78, 0.30, 0.08, 0.07, np.nan ] assert np.allclose(diff['perc_aftertax'], expected, atol=0.005, rtol=0.0, equal_nan=True) diff = create_difference_table(calc1.records, calc2.records, groupby='small_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, 0.01, 0.02, 0.16, 0.64, 0.82, 0.87, 0.92, 1.10, 1.15, 1.04, 0.78, 0.30, 0.08, 0.09, 0.07, 0.05, 0.02, 0.00, np.nan ] assert np.allclose(diff['perc_aftertax'], expected, atol=0.005, rtol=0.0, equal_nan=True) diff = create_difference_table(calc1.records, calc2.records, groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ 14931, 276555, 7728872, 22552703, 34008512, 50233787, 76811377, 111167087, 123226970, 111414038, 537434832, 66560891, 39571078, 5282069 ] assert np.allclose(diff['tot_change'], expected, atol=0.5, rtol=0.0) expected = [ 0.00, 0.05, 1.44, 4.20, 6.33, 9.35, 14.29, 20.68, 22.93, 20.73, 100.00, 12.38, 7.36, 0.98 ] assert np.allclose(diff['share_of_change'], expected, atol=0.005, rtol=0.0) expected = [ 0.00, 0.02, 0.35, 0.79, 0.89, 0.97, 1.11, 1.18, 0.91, 0.50, np.nan, 0.70, 0.37, 0.06 ] assert np.allclose(diff['perc_aftertax'], expected, atol=0.005, rtol=0.0, equal_nan=True) expected = [ -0.00, -0.02, -0.35, -0.79, -0.89, -0.97, -1.11, -1.18, -0.91, -0.50, np.nan, -0.70, -0.37, -0.06 ] assert np.allclose(diff['pc_aftertaxinc'], expected, atol=0.005, rtol=0.0, equal_nan=True) with pytest.raises(ValueError): create_difference_table(calc1.records, calc2.records, groupby='bad_bins', income_measure='expanded_income', tax_to_diff='iitax') # test creating various distribution tables dist = create_distribution_table(calc2.records, groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ -8851215, -99666120, -123316561, -85895787, -47357458, 207462144, 443391189, 978487989, 1709504845, 7631268907, 10605027933, 4171055704, 2751003155, 709210048 ] assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0) expected = [ 1202, 1688, 13506, 18019, 30130, 48244, 80994, 112788, 131260, 146001, 583832, 75279, 56819, 13903 ] assert np.allclose(dist['num_returns_ItemDed'].tolist(), expected, atol=0.5, rtol=0.0) expected = [ 158456013, 1351981790, 2383726863, 3408544081, 4569232020, 6321944661, 8520304098, 11817197884, 17299173380, 41117720202, 96948280992, 21687950798, 15093608351, 4336161053 ] assert np.allclose(dist['expanded_income'].tolist(), expected, atol=0.5, rtol=0.0) expected = [ 147367698, 1354827269, 2351611947, 3192405234, 4157431713, 5454468907, 7125788590, 9335613303, 13417244946, 29691084873, 76227844481, 15608893056, 10854804442, 3227387375 ] assert np.allclose(dist['aftertax_income'].tolist(), expected, atol=0.5, rtol=0.0) dist = create_distribution_table(calc2.records, groupby='webapp_income_bins', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ -103274, -83144506, -152523834, -129881470, 85802556, 255480678, 832529135, 1066963515, 3023956558, 2876331264, 1008672459, 1820944852, 10605027933 ] assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0) expected = [ 0, 1202, 22654, 31665, 30547, 49851, 124786, 97349, 160147, 56806, 5803, 3023, 583832 ] assert np.allclose(dist['num_returns_ItemDed'].tolist(), expected, atol=0.5, rtol=0.0) setattr(calc2.records, 'expanded_income_baseline', getattr(calc2.records, 'expanded_income')) dist = create_distribution_table(calc2.records, groupby='webapp_income_bins', income_measure='expanded_income_baseline', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='small_income_bins', income_measure='expanded_income', result_type='bad_result_type') with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='bad_bins', income_measure='expanded_income', result_type='weighted_sum')
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='large_income_bins', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ np.nan, np.nan, -0.16, -0.57, -0.72, -0.69, -0.82, -0.80, -0.75, -0.65, -0.18, -0.59 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='standard_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ np.nan, np.nan, -0.16, -0.57, -0.72, -0.69, -0.82, -0.80, -0.75, -0.65, -0.23, -0.09, -0.06, -0.59 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='small_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ np.nan, np.nan, -0.30, -0.10, -0.24, -0.76, -0.67, -0.75, -0.69, -0.82, -0.80, -0.75, -0.65, -0.23, -0.09, -0.08, -0.07, -0.05, -0.02, np.nan, -0.59 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ 0, 0, 1219678, 15503037, 25922077, 35000592, 48336897, 62637728, 79750078, 93136108, 116996252, 102458801, 580961247, 63156380, 33664610, 5637811 ] tabcol = 'tot_change' if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0.00, 0.00, 0.21, 2.67, 4.46, 6.02, 8.32, 10.78, 13.73, 16.03, 20.14, 17.64, 100.00, 10.87, 5.79, 0.97 ] tabcol = 'share_of_change' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [ np.nan, np.nan, -0.15, -0.62, -0.70, -0.73, -0.78, -0.80, -0.80, -0.74, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [ np.nan, np.nan, -0.15, -0.62, -0.70, -0.73, -0.78, -0.80, -0.80, -0.74, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) # test creating various distribution tables dvdf = calc2.distribution_table_dataframe() dvdf = add_quantile_table_row_variable(dvdf, 'expanded_income', num_quantiles=10, decile_details=True) dist = create_distribution_table(dvdf, groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ 0, 0, -53644343, -65258622, -57617119, 37391333, 200879230, 329784586, 553827330, 1015854407, 1731283600, 7090603505, 10783103907, 1638192777, 2213960052, 3238450675 ] tabcol = 'iitax' if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0, 0, 2561, 12610, 21936, 29172, 50890, 61563, 78247, 91823, 118523, 128886, 596211, 63986, 51634, 13266 ] tabcol = 'num_returns_ItemDed' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0, 0, 835224673, 2639667638, 3940559051, 5286856071, 6972849344, 8881099529, 11467767759, 14761195525, 19832126806, 44213000235, 118830346631, 14399218059, 16868648076, 12945134101 ] tabcol = 'expanded_income' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0, 0, 818813684, 2466000535, 3671150517, 4790979126, 6173998985, 7754183496, 9907604744, 12510477225, 16273592612, 33915377411, 98282178334, 11345456373, 13400757263, 9169163776 ] tabcol = 'aftertax_income' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) dist = create_distribution_table(calc2.distribution_table_dataframe(), groupby='standard_income_bins', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ 0, 0, -42244205, -76727831, -62581860, 53797887, 217016689, 723516183, 1108097059, 3272479928, 2818979541, 950296405, 1820474110, 10783103907 ] tabcol = 'iitax' if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0, 0, 1202, 13614, 27272, 34407, 48265, 117225, 103319, 181885, 61014, 5126, 2882, 596211 ] tabcol = 'num_returns_ItemDed' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) if test_failure: assert 1 == 2
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='large_income_bins', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, -0.14, -0.58, -0.70, -0.71, -0.81, -0.83, -0.74, -0.65, -0.18, -0.59 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='standard_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, -0.14, -0.58, -0.70, -0.71, -0.81, -0.83, -0.74, -0.65, -0.23, -0.09, -0.06, -0.59 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='small_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, -0.29, -0.07, -0.22, -0.80, -0.65, -0.74, -0.71, -0.81, -0.83, -0.74, -0.65, -0.23, -0.09, -0.08, -0.07, -0.05, -0.02, 0.00, -0.59 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ 0, 855188, 15425829, 26212078, 33369237, 50208703, 63312937, 82312360, 90711899, 117518598, 101779164, 581705993, 62142547, 33919755, 5716862 ] tabcol = 'tot_change' if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0.00, 0.15, 2.65, 4.51, 5.74, 8.63, 10.88, 14.15, 15.59, 20.20, 17.50, 100.00, 10.68, 5.83, 0.98 ] tabcol = 'share_of_change' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [ 0.00, -0.11, -0.62, -0.71, -0.69, -0.81, -0.82, -0.82, -0.72, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [ 0.00, -0.11, -0.62, -0.71, -0.69, -0.81, -0.82, -0.82, -0.72, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) # test creating various distribution tables dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ 0, -56140397, -67237556, -58897159, 17222017, 212673684, 328116256, 573255089, 992965515, 1730626734, 7142993526, 10815577709, 1625179635, 2241659962, 3276153930 ] tabcol = 'iitax' if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0, 1202, 13981, 21932, 27445, 52318, 62509, 79749, 91861, 117068, 129463, 597527, 63940, 52137, 13387 ] tabcol = 'num_returns_ItemDed' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0, 812766585, 2639118220, 3940557055, 5243088362, 6988752253, 8827238879, 11605062543, 14729565181, 19894042635, 44374875397, 119055067109, 14255277238, 17039539254, 13080058905 ] tabcol = 'expanded_income' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0, 801755209, 2466382489, 3674186760, 4779876836, 6150380331, 7701226391, 10000914935, 12515316309, 16352910962, 34006973974, 98449924197, 11219604941, 13525917494, 9261451538 ] tabcol = 'aftertax_income' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES), groupby='standard_income_bins', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ 0, -44670465, -79534586, -61791623, 34666275, 216487136, 742113595, 1099657851, 3270948526, 2826393721, 962881064, 1848426216, 10815577709 ] tabcol = 'iitax' if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0, 1202, 13625, 27355, 33694, 50236, 116751, 104035, 181572, 60936, 5196, 2924, 597527 ] tabcol = 'num_returns_ItemDed' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) if test_failure: assert 1 == 2
def dropq_summary(df1, df2, mask): """ df1 contains raw results for the standard plan X and X' df2 contains raw results the user-specified plan (Plan Y) mask is the boolean mask where X and X' match """ # pylint: disable=too-many-locals df1, df2 = drop_records(df1, df2, mask) # Totals for diff between baseline and reform dec_sum = (df2['tax_diff_dec'] * df2['s006']).sum() bin_sum = (df2['tax_diff_bin'] * df2['s006']).sum() pr_dec_sum = (df2['payrolltax_diff_dec'] * df2['s006']).sum() pr_bin_sum = (df2['payrolltax_diff_bin'] * df2['s006']).sum() combined_dec_sum = (df2['combined_diff_dec'] * df2['s006']).sum() combined_bin_sum = (df2['combined_diff_bin'] * df2['s006']).sum() # Totals for baseline sum_baseline = (df1['iitax'] * df1['s006']).sum() pr_sum_baseline = (df1['payrolltax'] * df1['s006']).sum() combined_sum_baseline = (df1['combined'] * df1['s006']).sum() # Totals for reform sum_reform = (df2['iitax_dec'] * df2['s006']).sum() pr_sum_reform = (df2['payrolltax_dec'] * df2['s006']).sum() combined_sum_reform = (df2['combined_dec'] * df2['s006']).sum() # Create difference tables, grouped by deciles and bins diffs_dec = dropq_diff_table(df1, df2, groupby='weighted_deciles', res_col='tax_diff', diff_col='iitax', suffix='_dec', wsum=dec_sum) diffs_bin = dropq_diff_table(df1, df2, groupby='webapp_income_bins', res_col='tax_diff', diff_col='iitax', suffix='_bin', wsum=bin_sum) pr_diffs_dec = dropq_diff_table(df1, df2, groupby='weighted_deciles', res_col='payrolltax_diff', diff_col='payrolltax', suffix='_dec', wsum=pr_dec_sum) pr_diffs_bin = dropq_diff_table(df1, df2, groupby='webapp_income_bins', res_col='payrolltax_diff', diff_col='payrolltax', suffix='_bin', wsum=pr_bin_sum) comb_diffs_dec = dropq_diff_table(df1, df2, groupby='weighted_deciles', res_col='combined_diff', diff_col='combined', suffix='_dec', wsum=combined_dec_sum) comb_diffs_bin = dropq_diff_table(df1, df2, groupby='webapp_income_bins', res_col='combined_diff', diff_col='combined', suffix='_bin', wsum=combined_bin_sum) m1_dec = create_distribution_table(df1, groupby='weighted_deciles', result_type='weighted_sum') m2_dec = dropq_dist_table(df2, groupby='weighted_deciles', result_type='weighted_sum', suffix='_dec') m1_bin = create_distribution_table(df1, groupby='webapp_income_bins', result_type='weighted_sum') m2_bin = dropq_dist_table(df2, groupby='webapp_income_bins', result_type='weighted_sum', suffix='_bin') return (m2_dec, m1_dec, diffs_dec, pr_diffs_dec, comb_diffs_dec, m2_bin, m1_bin, diffs_bin, pr_diffs_bin, comb_diffs_bin, dec_sum, pr_dec_sum, combined_dec_sum, sum_baseline, pr_sum_baseline, combined_sum_baseline, sum_reform, pr_sum_reform, combined_sum_reform)
def summary(df1, df2, mask): """ df1 contains raw results for baseline plan df2 contains raw results for reform plan mask is the boolean array specifying records with reform-induced tax diffs returns dictionary of summary results DataFrames """ # pylint: disable=too-many-statements,too-many-locals df2 = create_results_columns(df1, df2, mask) summ = dict() # tax difference totals between reform and baseline tdiff = df2['iitax_agg'] - df1['iitax'] aggr_itax_d = (tdiff * df2['s006']).sum() tdiff = df2['payrolltax_agg'] - df1['payrolltax'] aggr_ptax_d = (tdiff * df2['s006']).sum() tdiff = df2['combined_agg'] - df1['combined'] aggr_comb_d = (tdiff * df2['s006']).sum() aggrd = [aggr_itax_d, aggr_ptax_d, aggr_comb_d] summ['aggr_d'] = pd.DataFrame(data=aggrd, index=AGGR_ROW_NAMES) # totals for baseline aggr_itax_1 = (df1['iitax'] * df1['s006']).sum() aggr_ptax_1 = (df1['payrolltax'] * df1['s006']).sum() aggr_comb_1 = (df1['combined'] * df1['s006']).sum() aggr1 = [aggr_itax_1, aggr_ptax_1, aggr_comb_1] summ['aggr_1'] = pd.DataFrame(data=aggr1, index=AGGR_ROW_NAMES) # totals for reform aggr_itax_2 = (df2['iitax_agg'] * df2['s006']).sum() aggr_ptax_2 = (df2['payrolltax_agg'] * df2['s006']).sum() aggr_comb_2 = (df2['combined_agg'] * df2['s006']).sum() aggr2 = [aggr_itax_2, aggr_ptax_2, aggr_comb_2] summ['aggr_2'] = pd.DataFrame(data=aggr2, index=AGGR_ROW_NAMES) # create difference tables grouped by xdec df2['iitax'] = df2['iitax_xdec'] summ['diff_itax_xdec'] = \ create_difference_table(df1, df2, groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='iitax') df2['payrolltax'] = df2['payrolltax_xdec'] summ['diff_ptax_xdec'] = \ create_difference_table(df1, df2, groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='payrolltax') df2['combined'] = df2['combined_xdec'] summ['diff_comb_xdec'] = \ create_difference_table(df1, df2, groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') # create difference tables grouped by xbin (removing negative-income bin) df2['iitax'] = df2['iitax_xbin'] diff_itax_xbin = \ create_difference_table(df1, df2, groupby='standard_income_bins', income_measure='expanded_income', tax_to_diff='iitax') diff_itax_xbin.drop(diff_itax_xbin.index[0], inplace=True) summ['diff_itax_xbin'] = diff_itax_xbin df2['payrolltax'] = df2['payrolltax_xbin'] diff_ptax_xbin = \ create_difference_table(df1, df2, groupby='standard_income_bins', income_measure='expanded_income', tax_to_diff='payrolltax') diff_ptax_xbin.drop(diff_ptax_xbin.index[0], inplace=True) summ['diff_ptax_xbin'] = diff_ptax_xbin df2['combined'] = df2['combined_xbin'] diff_comb_xbin = \ create_difference_table(df1, df2, groupby='standard_income_bins', income_measure='expanded_income', tax_to_diff='combined') diff_comb_xbin.drop(diff_comb_xbin.index[0], inplace=True) summ['diff_comb_xbin'] = diff_comb_xbin # create distribution tables grouped by xdec summ['dist1_xdec'] = \ create_distribution_table(df1, groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') suffix = '_xdec' df2_cols_with_suffix = [c for c in list(df2) if c.endswith(suffix)] for col in df2_cols_with_suffix: root_col_name = col.replace(suffix, '') df2[root_col_name] = df2[col] df2['expanded_income_baseline'] = df1['expanded_income'] summ['dist2_xdec'] = \ create_distribution_table(df2, groupby='weighted_deciles', income_measure='expanded_income_baseline', result_type='weighted_sum') # create distribution tables grouped by xbin (removing negative-income bin) dist1_xbin = \ create_distribution_table(df1, groupby='standard_income_bins', income_measure='expanded_income', result_type='weighted_sum') dist1_xbin.drop(dist1_xbin.index[0], inplace=True) summ['dist1_xbin'] = dist1_xbin suffix = '_xbin' df2_cols_with_suffix = [c for c in list(df2) if c.endswith(suffix)] for col in df2_cols_with_suffix: root_col_name = col.replace(suffix, '') df2[root_col_name] = df2[col] df2['expanded_income_baseline'] = df1['expanded_income'] dist2_xbin = \ create_distribution_table(df2, groupby='standard_income_bins', income_measure='expanded_income_baseline', result_type='weighted_sum') dist2_xbin.drop(dist2_xbin.index[0], inplace=True) summ['dist2_xbin'] = dist2_xbin # return dictionary of summary results return summ
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='large_income_bins', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [np.nan, np.nan, -0.14, -0.58, -0.71, -0.70, -0.83, -0.81, -0.73, -0.65, -0.18, -0.59] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='standard_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [np.nan, np.nan, -0.14, -0.58, -0.71, -0.70, -0.83, -0.81, -0.73, -0.65, -0.23, -0.09, -0.06, -0.59] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='small_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [np.nan, np.nan, -0.29, -0.07, -0.23, -0.78, -0.66, -0.74, -0.70, -0.83, -0.81, -0.73, -0.65, -0.23, -0.09, -0.08, -0.07, -0.05, -0.02, np.nan, -0.59] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [0, 0, 1037894, 16199646, 25518793, 34455230, 49661093, 62344194, 82290396, 90006817, 117415735, 101818106, 580747904, 62408600, 33771695, 5637811] tabcol = 'tot_change' if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.0f},'.format(val)) expected = [0.00, 0.00, 0.18, 2.79, 4.39, 5.93, 8.55, 10.74, 14.17, 15.50, 20.22, 17.53, 100.00, 10.75, 5.82, 0.97] tabcol = 'share_of_change' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [np.nan, np.nan, -0.13, -0.65, -0.68, -0.71, -0.79, -0.80, -0.82, -0.71, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [np.nan, np.nan, -0.13, -0.65, -0.68, -0.71, -0.79, -0.80, -0.82, -0.71, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) # test creating various distribution tables dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [0, 0, -54678669, -64005792, -64426464, 32739840, 207396898, 317535861, 575238615, 984782596, 1731373913, 7082515174, 10748471972, 1622921432, 2217477146, 3242116596] tabcol = 'iitax' if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, 0, 2561, 13268, 21368, 28377, 53186, 60433, 79779, 91010, 117445, 128784, 596211, 63766, 51681, 13337] tabcol = 'num_returns_ItemDed' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, 0, 836765692, 2661991174, 3978757611, 5306258004, 7022134388, 8871843614, 11530190180, 14721635194, 19860290487, 44177752076, 118967618420, 14296456955, 16895894429, 12985400692] tabcol = 'expanded_income' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, 0, 821526457, 2483359936, 3714540881, 4821394144, 6200512981, 7763298300, 9921184240, 12527297334, 16314596486, 33886371300, 98454082058, 11265497052, 13416447851, 9204426396] tabcol = 'aftertax_income' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES), groupby='standard_income_bins', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [0, 0, -43150804, -77526808, -64845122, 43303823, 225370761, 723847940, 1098042284, 3264499170, 2808160213, 950296405, 1820474110, 10748471972] tabcol = 'iitax' if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, 0, 1202, 13614, 27319, 33655, 50186, 116612, 103896, 181192, 60527, 5126, 2882, 596211] tabcol = 'num_returns_ItemDed' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) if test_failure: assert 1 == 2
def distribution_tables(self, calc, groupby, averages=False, scaling=True): """ Get results from self and calc, sort them by GTI into table rows defined by groupby, compute grouped statistics, and return tables as a pair of Pandas dataframes. This method leaves the Calculator object(s) unchanged. Note that the returned tables have consistent income groups (based on the self GTI) even though the baseline GTI in self and the reform GTI in calc are different. Parameters ---------- calc : Calculator object or None typically represents the reform while self represents the baseline; if calc is None, the second returned table is None groupby : String object options for input: 'weighted_deciles', 'standard_income_bins' determines how the columns in resulting Pandas DataFrame are sorted averages : boolean specifies whether or not monetary table entries are aggregates or averages (default value of False implies entries are aggregates) scaling : boolean specifies whether or not monetary table entries are scaled to billions and rounded to three decimal places when averages=False, or when averages=True, to thousands and rounded to three decimal places. Regardless of the value of averages, non-monetary table entries are scaled to millions and rounded to three decimal places (default value of False implies entries are scaled and rounded) Return and typical usage ------------------------ dist1, dist2 = calc1.distribution_tables(calc2, 'weighted_deciles') OR dist1, _ = calc1.distribution_tables(None, 'weighted_deciles') (where calc1 is a baseline Calculator object and calc2 is a reform Calculator object). Each of the dist1 and optional dist2 is a distribution table as a Pandas DataFrame with DIST_TABLE_COLUMNS and groupby rows. NOTE: when groupby is 'weighted_deciles', the returned tables have 3 extra rows containing top-decile detail consisting of statistics for the 0.90-0.95 quantile range (bottom half of top decile), for the 0.95-0.99 quantile range, and for the 0.99-1.00 quantile range (top one percent); and the returned table splits the bottom decile into filing units with negative (denoted by a 0-10n row label), zero (denoted by a 0-10z row label), and positive (denoted by a 0-10p row label) values of the specified income_measure. """ # nested function used only by this method def have_same_income_measure(calc1, calc2): """ Return true if calc1 and calc2 contain the same GTI; otherwise, return false. (Note that "same" means nobody's GTI differs by more than one cent.) """ im1 = calc1.array('GTI') im2 = calc2.array('GTI') return np.allclose(im1, im2, rtol=0.0, atol=0.01) # main logic of method assert calc is None or isinstance(calc, Calculator) assert (groupby == 'weighted_deciles' or groupby == 'standard_income_bins') if calc is not None: assert np.allclose(self.array('weight'), calc.array('weight')) # rows in same order var_dataframe = self.distribution_table_dataframe() imeasure = 'GTI' dt1 = create_distribution_table(var_dataframe, groupby, imeasure, averages, scaling) del var_dataframe if calc is None: dt2 = None else: assert calc.current_year == self.current_year assert calc.array_len == self.array_len var_dataframe = calc.distribution_table_dataframe() if have_same_income_measure(self, calc): imeasure = 'GTI' else: imeasure = 'GTI_baseline' var_dataframe[imeasure] = self.array('GTI') dt2 = create_distribution_table(var_dataframe, groupby, imeasure, averages, scaling) del var_dataframe return (dt1, dt2)
def test_create_tables(cps_subsample): # create a current-law Policy object and Calculator object calc1 policy1 = Policy() records1 = Records.cps_constructor(data=cps_subsample) calc1 = Calculator(policy=policy1, records=records1) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} policy2 = Policy() policy2.implement_reform(reform) records2 = Records.cps_constructor(data=cps_subsample) calc2 = Calculator(policy=policy2, records=records2) calc2.calc_all() # test creating various difference tables diff = create_difference_table(calc1.records, calc2.records, groupby='large_income_bins', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ '0.00%', '0.01%', '0.41%', '0.84%', '0.92%', '1.10%', '1.15%', '1.04%', '0.78%', '0.27%', 'n/a' ] assert np.array_equal(diff['perc_aftertax'], expected) diff = create_difference_table(calc1.records, calc2.records, groupby='webapp_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ '0.00%', '0.01%', '0.41%', '0.84%', '0.92%', '1.10%', '1.15%', '1.04%', '0.78%', '0.30%', '0.08%', '0.07%', 'n/a' ] assert np.array_equal(diff['perc_aftertax'], expected) diff = create_difference_table(calc1.records, calc2.records, groupby='small_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ '0.00%', '0.01%', '0.02%', '0.16%', '0.64%', '0.82%', '0.87%', '0.92%', '1.10%', '1.15%', '1.04%', '0.78%', '0.30%', '0.08%', '0.09%', '0.07%', '0.05%', '0.02%', '0.00%', 'n/a' ] assert np.array_equal(diff['perc_aftertax'], expected) diff = create_difference_table(calc1.records, calc2.records, groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ '0.00%', '0.02%', '0.35%', '0.79%', '0.89%', '0.97%', '1.11%', '1.18%', '0.91%', '0.50%', 'n/a' ] assert np.array_equal(diff['perc_aftertax'], expected) with pytest.raises(ValueError): create_difference_table(calc1.records, calc2.records, groupby='bad_bins', income_measure='expanded_income', tax_to_diff='iitax') # test creating various distribution tables dist = create_distribution_table(calc2.records, groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ -8851215, -99666120, -123316561, -85895787, -47357458, 207462144, 443391189, 978487989, 1709504845, 7631268907, 10605027933 ] assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0) expected = [ 1202, 1688, 13506, 18019, 30130, 48244, 80994, 112788, 131260, 146001, 583832 ] assert np.allclose(dist['num_returns_ItemDed'].tolist(), expected, atol=0.5, rtol=0.0) dist = create_distribution_table(calc2.records, groupby='webapp_income_bins', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ -103274, -83144506, -152523834, -129881470, 85802556, 255480678, 832529135, 1066963515, 3023956558, 2876331264, 1008672459, 1820944852, 10605027933 ] assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0) expected = [ 0, 1202, 22654, 31665, 30547, 49851, 124786, 97349, 160147, 56806, 5803, 3023, 583832 ] assert np.allclose(dist['num_returns_ItemDed'].tolist(), expected, atol=0.5, rtol=0.0) setattr(calc2.records, 'expanded_income_baseline', getattr(calc2.records, 'expanded_income')) dist = create_distribution_table(calc2.records, groupby='webapp_income_bins', income_measure='expanded_income_baseline', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='small_income_bins', income_measure='expanded_income', result_type='bad_result_type') with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='bad_bins', income_measure='expanded_income', result_type='weighted_sum')