def test_diff_table_sum_row(puf_1991, weights_1991): # create a current-law Policy object and Calculator calc1 policy1 = Policy() records1 = Records(data=puf_1991, weights=weights_1991, start_year=2009) calc1 = Calculator(policy=policy1, records=records1) calc1.calc_all() # create a policy-reform Policy object and Calculator calc2 reform = {2013: {'_II_rt4': [0.56]}} policy2 = Policy() policy2.implement_reform(reform) records2 = Records(data=puf_1991, weights=weights_1991, start_year=2009) calc2 = Calculator(policy=policy2, records=records2) calc2.calc_all() # create two difference tables and compare their content tdiff1 = create_difference_table(calc1.records, calc2.records, groupby='small_income_bins') tdiff2 = create_difference_table(calc1.records, calc2.records, groupby='large_income_bins') non_digit_cols = [ 'mean', 'perc_inc', 'perc_cut', 'share_of_change', 'aftertax_perc' ] digit_cols = [ x for x in tdiff1.columns.tolist() if x not in non_digit_cols ] assert np.allclose(tdiff1[digit_cols][-1:], tdiff2[digit_cols][-1:]) assert_array_equal(tdiff1[non_digit_cols][-1:], tdiff2[non_digit_cols][-1:])
def test_diff_table_sum_row(cps_subsample): rec = Records.cps_constructor(data=cps_subsample) # create a current-law Policy object and Calculator calc1 pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator calc2 reform = {'II_rt4': {2013: 0.56}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() # create three difference tables and compare their content dv1 = calc1.dataframe(DIFF_VARIABLES) dv2 = calc2.dataframe(DIFF_VARIABLES) dt1 = create_difference_table(dv1, dv2, 'standard_income_bins', 'iitax') dt2 = create_difference_table(dv1, dv2, 'soi_agi_bins', 'iitax') dt3 = create_difference_table(dv1, dv2, 'weighted_deciles', 'iitax', pop_quantiles=False) dt4 = create_difference_table(dv1, dv2, 'weighted_deciles', 'iitax', pop_quantiles=True) assert np.allclose(dt1.loc['ALL'], dt2.loc['ALL']) assert np.allclose(dt1.loc['ALL'], dt3.loc['ALL']) # make sure population count is larger than filing-unit count assert dt4.at['ALL', 'count'] > dt1.at['ALL', 'count']
def test_diff_table_sum_row(cps_subsample): rec = Records.cps_constructor(data=cps_subsample) # create a current-law Policy object and Calculator calc1 pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator calc2 reform = {2013: {'_II_rt4': [0.56]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() # create two difference tables and compare their content tdiff1 = create_difference_table(calc1.records, calc2.records, groupby='small_income_bins', income_measure='expanded_income', tax_to_diff='iitax') tdiff2 = create_difference_table(calc1.records, calc2.records, groupby='large_income_bins', income_measure='expanded_income', tax_to_diff='iitax') non_digit_cols = [ 'mean', 'perc_inc', 'perc_cut', 'share_of_change', 'perc_aftertax', 'pc_aftertaxinc' ] digit_cols = [c for c in list(tdiff1) if c not in non_digit_cols] assert np.allclose(tdiff1[digit_cols][-1:], tdiff2[digit_cols][-1:]) np.testing.assert_array_equal(tdiff1[non_digit_cols][-1:], tdiff2[non_digit_cols][-1:])
def differences_table(self, year: int, groupby: str, tax_to_diff: str, pop_quantiles: bool = False) -> pd.DataFrame: """ Method to create a differences table Parameters ---------- year: which year the difference table should be from groupby: determines how the rows in the table are sorted options: 'weighted_deciles', 'standard_income_bins', 'soi_agi_bin' tax_to_diff: which tax to take the difference of options: 'iitax', 'payrolltax', 'combined' pop_quantiles: whether weighted_deciles contain an equal number of tax units (False) or people (True) Returns ------- DataFrame containing a differences table """ base_data = self.base_data[year] reform_data = self.reform_data[year] table = create_difference_table(base_data, reform_data, groupby, tax_to_diff, pop_quantiles) return table
def summary_diff_xdec(res, df1, df2): """ res is dictionary of summary-results DataFrames. df1 contains results variables for baseline policy. df2 contains results variables for reform policy. returns augmented dictionary of summary-results DataFrames. """ # create difference tables grouped by xdec res['diff_itax_xdec'] = \ create_difference_table(df1, df2, 'weighted_deciles', 'iitax') res['diff_ptax_xdec'] = \ create_difference_table(df1, df2, 'weighted_deciles', 'payrolltax') res['diff_comb_xdec'] = \ create_difference_table(df1, df2, 'weighted_deciles', 'combined') # return res dictionary return res
def difference_table(self, calc, groupby, tax_to_diff): """ Get results from self and calc, sort them by expanded_income into table rows defined by groupby, compute grouped statistics, and return tax-difference table as a Pandas dataframe. This method leaves the Calculator objects unchanged. Note that the returned tables have consistent income groups (based on the self expanded_income) even though the baseline expanded_income in self and the reform expanded_income in calc are different. Parameters ---------- calc : Calculator object calc represents the reform while self represents the baseline groupby : String object options for input: 'weighted_deciles', 'standard_income_bins' determines how the columns in resulting Pandas DataFrame are sorted tax_to_diff : String object options for input: 'iitax', 'payrolltax', 'combined' specifies which tax to difference Returns and typical usage ------------------------- diff = calc1.difference_table(calc2, 'weighted_deciles', 'iitax') (where calc1 is a baseline Calculator object and calc2 is a reform Calculator object). The returned diff is a difference table as a Pandas DataFrame with DIST_TABLE_COLUMNS and groupby rows. NOTE: when groupby is 'weighted_deciles', the returned table has three extra rows containing top-decile detail consisting of statistics for the 0.90-0.95 quantile range (bottom half of top decile), for the 0.95-0.99 quantile range, and for the 0.99-1.00 quantile range (top one percent); and the returned table splits the bottom decile into filing units with negative (denoted by a 0-10n row label), zero (denoted by a 0-10z row label), and positive (denoted by a 0-10p row label) values of the specified income_measure. """ assert isinstance(calc, Calculator) assert calc.current_year == self.current_year assert calc.array_len == self.array_len self_var_dataframe = self.dataframe(DIFF_VARIABLES) calc_var_dataframe = calc.dataframe(DIFF_VARIABLES) diff = create_difference_table(self_var_dataframe, calc_var_dataframe, groupby, tax_to_diff) del self_var_dataframe del calc_var_dataframe return diff
def xtest_diff_table_sum_row(pit_subsample): rec = Records(data=pit_subsample) # create a current-law Policy object and Calculator calc1 pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator calc2 reform = {2017: {'_rate2': [0.06]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() # create two difference tables and compare their content tdiff1 = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'standard_income_bins', 'iitax') tdiff2 = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'soi_agi_bins', 'iitax') non_digit_cols = ['perc_inc', 'perc_cut'] digit_cols = [c for c in list(tdiff1) if c not in non_digit_cols] assert np.allclose(tdiff1[digit_cols][-1:], tdiff2[digit_cols][-1:]) np.allclose(tdiff1[non_digit_cols][-1:], tdiff2[non_digit_cols][-1:])
def test_diff_table_sum_row(cps_subsample): rec = Records.cps_constructor(data=cps_subsample) # create a current-law Policy object and Calculator calc1 pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator calc2 reform = {'II_rt4': {2013: 0.56}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() # create two difference tables and compare their content tdiff1 = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'standard_income_bins', 'iitax') tdiff2 = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'soi_agi_bins', 'iitax') non_digit_cols = ['perc_inc', 'perc_cut'] digit_cols = [c for c in list(tdiff1) if c not in non_digit_cols] assert np.allclose(tdiff1[digit_cols][-1:], tdiff2[digit_cols][-1:]) np.allclose(tdiff1[non_digit_cols][-1:], tdiff2[non_digit_cols][-1:])
def test_create_tables(puf_1991, weights_1991): # create a current-law Policy object and Calculator object calc1 policy1 = Policy() records1 = Records(data=puf_1991, weights=weights_1991, start_year=2009) calc1 = Calculator(policy=policy1, records=records1) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt4': [0.56]}} policy2 = Policy() policy2.implement_reform(reform) records2 = Records(data=puf_1991, weights=weights_1991, start_year=2009) calc2 = Calculator(policy=policy2, records=records2) calc2.calc_all() # test creating various distribution tables dt1 = create_difference_table(calc1.records, calc2.records, groupby='large_income_bins') assert isinstance(dt1, pd.DataFrame) dt2 = create_difference_table(calc1.records, calc2.records, groupby='webapp_income_bins') assert isinstance(dt2, pd.DataFrame) with pytest.raises(ValueError): create_difference_table(calc1.records, calc2.records, groupby='bad_bins') with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='small_income_bins', result_type='bad_result_type') with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='bad_bins', result_type='weighted_sum') dt3 = create_distribution_table(calc2.records, groupby='small_income_bins', result_type='weighted_sum', baseline_obj=calc1.records, diffs=True) assert isinstance(dt3, pd.DataFrame) calc1.increment_year() with pytest.raises(ValueError): create_difference_table(calc1.records, calc2.records, groupby='large_income_bins') with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='small_income_bins', result_type='weighted_sum', baseline_obj=calc1.records, diffs=True)
def differences_table(self, year, groupby, tax_to_diff): """ Method to create a differences table Parameters ---------- year: which year the difference table should be from groupby: determines how the rows in the table are sorted options: 'weighted_deciles', 'standard_income_bins', 'soi_agi_bin' tax_to_diff: which tax to take the difference of options: 'iitax', 'payrolltax', 'combined' run_type: use data from the static or dynamic run Returns ------- DataFrame containing a differences table """ base_data = self.base_data[year] reform_data = self.reform_data[year] table = create_difference_table(base_data, reform_data, groupby, tax_to_diff) return table
def test_create_tables(cps_subsample): # create a current-law Policy object and Calculator object calc1 policy1 = Policy() records1 = Records.cps_constructor(data=cps_subsample) calc1 = Calculator(policy=policy1, records=records1) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} policy2 = Policy() policy2.implement_reform(reform) records2 = Records.cps_constructor(data=cps_subsample) calc2 = Calculator(policy=policy2, records=records2) calc2.calc_all() # test creating various difference tables diff = create_difference_table(calc1.records, calc2.records, groupby='large_income_bins', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ '0.00%', '0.01%', '0.41%', '0.84%', '0.92%', '1.10%', '1.15%', '1.04%', '0.78%', '0.27%', 'n/a' ] assert np.array_equal(diff['perc_aftertax'], expected) diff = create_difference_table(calc1.records, calc2.records, groupby='webapp_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ '0.00%', '0.01%', '0.41%', '0.84%', '0.92%', '1.10%', '1.15%', '1.04%', '0.78%', '0.30%', '0.08%', '0.07%', 'n/a' ] assert np.array_equal(diff['perc_aftertax'], expected) diff = create_difference_table(calc1.records, calc2.records, groupby='small_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ '0.00%', '0.01%', '0.02%', '0.16%', '0.64%', '0.82%', '0.87%', '0.92%', '1.10%', '1.15%', '1.04%', '0.78%', '0.30%', '0.08%', '0.09%', '0.07%', '0.05%', '0.02%', '0.00%', 'n/a' ] assert np.array_equal(diff['perc_aftertax'], expected) diff = create_difference_table(calc1.records, calc2.records, groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ '0.00%', '0.02%', '0.35%', '0.79%', '0.89%', '0.97%', '1.11%', '1.18%', '0.91%', '0.50%', 'n/a' ] assert np.array_equal(diff['perc_aftertax'], expected) with pytest.raises(ValueError): create_difference_table(calc1.records, calc2.records, groupby='bad_bins', income_measure='expanded_income', tax_to_diff='iitax') # test creating various distribution tables dist = create_distribution_table(calc2.records, groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ -8851215, -99666120, -123316561, -85895787, -47357458, 207462144, 443391189, 978487989, 1709504845, 7631268907, 10605027933 ] assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0) expected = [ 1202, 1688, 13506, 18019, 30130, 48244, 80994, 112788, 131260, 146001, 583832 ] assert np.allclose(dist['num_returns_ItemDed'].tolist(), expected, atol=0.5, rtol=0.0) dist = create_distribution_table(calc2.records, groupby='webapp_income_bins', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ -103274, -83144506, -152523834, -129881470, 85802556, 255480678, 832529135, 1066963515, 3023956558, 2876331264, 1008672459, 1820944852, 10605027933 ] assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0) expected = [ 0, 1202, 22654, 31665, 30547, 49851, 124786, 97349, 160147, 56806, 5803, 3023, 583832 ] assert np.allclose(dist['num_returns_ItemDed'].tolist(), expected, atol=0.5, rtol=0.0) setattr(calc2.records, 'expanded_income_baseline', getattr(calc2.records, 'expanded_income')) dist = create_distribution_table(calc2.records, groupby='webapp_income_bins', income_measure='expanded_income_baseline', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='small_income_bins', income_measure='expanded_income', result_type='bad_result_type') with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='bad_bins', income_measure='expanded_income', result_type='weighted_sum')
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() # test creating various difference tables diff = create_difference_table(calc1.records, calc2.records, groupby='large_income_bins', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, 0.01, 0.41, 0.84, 0.92, 1.10, 1.15, 1.04, 0.78, 0.27, np.nan ] assert np.allclose(diff['perc_aftertax'], expected, atol=0.005, rtol=0.0, equal_nan=True) diff = create_difference_table(calc1.records, calc2.records, groupby='webapp_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, 0.01, 0.41, 0.84, 0.92, 1.10, 1.15, 1.04, 0.78, 0.30, 0.08, 0.07, np.nan ] assert np.allclose(diff['perc_aftertax'], expected, atol=0.005, rtol=0.0, equal_nan=True) diff = create_difference_table(calc1.records, calc2.records, groupby='small_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, 0.01, 0.02, 0.16, 0.64, 0.82, 0.87, 0.92, 1.10, 1.15, 1.04, 0.78, 0.30, 0.08, 0.09, 0.07, 0.05, 0.02, 0.00, np.nan ] assert np.allclose(diff['perc_aftertax'], expected, atol=0.005, rtol=0.0, equal_nan=True) diff = create_difference_table(calc1.records, calc2.records, groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ 14931, 276555, 7728872, 22552703, 34008512, 50233787, 76811377, 111167087, 123226970, 111414038, 537434832, 66560891, 39571078, 5282069 ] assert np.allclose(diff['tot_change'], expected, atol=0.5, rtol=0.0) expected = [ 0.00, 0.05, 1.44, 4.20, 6.33, 9.35, 14.29, 20.68, 22.93, 20.73, 100.00, 12.38, 7.36, 0.98 ] assert np.allclose(diff['share_of_change'], expected, atol=0.005, rtol=0.0) expected = [ 0.00, 0.02, 0.35, 0.79, 0.89, 0.97, 1.11, 1.18, 0.91, 0.50, np.nan, 0.70, 0.37, 0.06 ] assert np.allclose(diff['perc_aftertax'], expected, atol=0.005, rtol=0.0, equal_nan=True) expected = [ -0.00, -0.02, -0.35, -0.79, -0.89, -0.97, -1.11, -1.18, -0.91, -0.50, np.nan, -0.70, -0.37, -0.06 ] assert np.allclose(diff['pc_aftertaxinc'], expected, atol=0.005, rtol=0.0, equal_nan=True) with pytest.raises(ValueError): create_difference_table(calc1.records, calc2.records, groupby='bad_bins', income_measure='expanded_income', tax_to_diff='iitax') # test creating various distribution tables dist = create_distribution_table(calc2.records, groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ -8851215, -99666120, -123316561, -85895787, -47357458, 207462144, 443391189, 978487989, 1709504845, 7631268907, 10605027933, 4171055704, 2751003155, 709210048 ] assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0) expected = [ 1202, 1688, 13506, 18019, 30130, 48244, 80994, 112788, 131260, 146001, 583832, 75279, 56819, 13903 ] assert np.allclose(dist['num_returns_ItemDed'].tolist(), expected, atol=0.5, rtol=0.0) expected = [ 158456013, 1351981790, 2383726863, 3408544081, 4569232020, 6321944661, 8520304098, 11817197884, 17299173380, 41117720202, 96948280992, 21687950798, 15093608351, 4336161053 ] assert np.allclose(dist['expanded_income'].tolist(), expected, atol=0.5, rtol=0.0) expected = [ 147367698, 1354827269, 2351611947, 3192405234, 4157431713, 5454468907, 7125788590, 9335613303, 13417244946, 29691084873, 76227844481, 15608893056, 10854804442, 3227387375 ] assert np.allclose(dist['aftertax_income'].tolist(), expected, atol=0.5, rtol=0.0) dist = create_distribution_table(calc2.records, groupby='webapp_income_bins', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ -103274, -83144506, -152523834, -129881470, 85802556, 255480678, 832529135, 1066963515, 3023956558, 2876331264, 1008672459, 1820944852, 10605027933 ] assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0) expected = [ 0, 1202, 22654, 31665, 30547, 49851, 124786, 97349, 160147, 56806, 5803, 3023, 583832 ] assert np.allclose(dist['num_returns_ItemDed'].tolist(), expected, atol=0.5, rtol=0.0) setattr(calc2.records, 'expanded_income_baseline', getattr(calc2.records, 'expanded_income')) dist = create_distribution_table(calc2.records, groupby='webapp_income_bins', income_measure='expanded_income_baseline', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='small_income_bins', income_measure='expanded_income', result_type='bad_result_type') with pytest.raises(ValueError): create_distribution_table(calc2.records, groupby='bad_bins', income_measure='expanded_income', result_type='weighted_sum')
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='large_income_bins', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [np.nan, np.nan, -0.14, -0.58, -0.71, -0.70, -0.83, -0.81, -0.73, -0.65, -0.18, -0.59] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='standard_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [np.nan, np.nan, -0.14, -0.58, -0.71, -0.70, -0.83, -0.81, -0.73, -0.65, -0.23, -0.09, -0.06, -0.59] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='small_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [np.nan, np.nan, -0.29, -0.07, -0.23, -0.78, -0.66, -0.74, -0.70, -0.83, -0.81, -0.73, -0.65, -0.23, -0.09, -0.08, -0.07, -0.05, -0.02, np.nan, -0.59] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [0, 0, 1037894, 16199646, 25518793, 34455230, 49661093, 62344194, 82290396, 90006817, 117415735, 101818106, 580747904, 62408600, 33771695, 5637811] tabcol = 'tot_change' if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.0f},'.format(val)) expected = [0.00, 0.00, 0.18, 2.79, 4.39, 5.93, 8.55, 10.74, 14.17, 15.50, 20.22, 17.53, 100.00, 10.75, 5.82, 0.97] tabcol = 'share_of_change' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [np.nan, np.nan, -0.13, -0.65, -0.68, -0.71, -0.79, -0.80, -0.82, -0.71, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [np.nan, np.nan, -0.13, -0.65, -0.68, -0.71, -0.79, -0.80, -0.82, -0.71, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) # test creating various distribution tables dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [0, 0, -54678669, -64005792, -64426464, 32739840, 207396898, 317535861, 575238615, 984782596, 1731373913, 7082515174, 10748471972, 1622921432, 2217477146, 3242116596] tabcol = 'iitax' if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, 0, 2561, 13268, 21368, 28377, 53186, 60433, 79779, 91010, 117445, 128784, 596211, 63766, 51681, 13337] tabcol = 'num_returns_ItemDed' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, 0, 836765692, 2661991174, 3978757611, 5306258004, 7022134388, 8871843614, 11530190180, 14721635194, 19860290487, 44177752076, 118967618420, 14296456955, 16895894429, 12985400692] tabcol = 'expanded_income' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, 0, 821526457, 2483359936, 3714540881, 4821394144, 6200512981, 7763298300, 9921184240, 12527297334, 16314596486, 33886371300, 98454082058, 11265497052, 13416447851, 9204426396] tabcol = 'aftertax_income' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES), groupby='standard_income_bins', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [0, 0, -43150804, -77526808, -64845122, 43303823, 225370761, 723847940, 1098042284, 3264499170, 2808160213, 950296405, 1820474110, 10748471972] tabcol = 'iitax' if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, 0, 1202, 13614, 27319, 33655, 50186, 116612, 103896, 181192, 60527, 5126, 2882, 596211] tabcol = 'num_returns_ItemDed' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) if test_failure: assert 1 == 2
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'standard_income_bins', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'pc_aftertaxinc' expected = [ np.nan, np.nan, -0.16, -0.57, -0.72, -0.69, -0.82, -0.80, -0.75, -0.65, -0.23, -0.09, -0.06, -0.59 ] if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff xbin', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'weighted_deciles', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'tot_change' expected = [ 0, 0, 1219678, 15503037, 25922077, 35000592, 48336897, 62637728, 79750078, 93136108, 116996252, 102458801, 580961247, 63156380, 33664610, 5637811 ] if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'share_of_change' expected = [ 0.00, 0.00, 0.21, 2.67, 4.46, 6.02, 8.32, 10.78, 13.73, 16.03, 20.14, 17.64, 100.00, 10.87, 5.79, 0.97 ] if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) tabcol = 'pc_aftertaxinc' expected = [ np.nan, np.nan, -0.15, -0.62, -0.70, -0.73, -0.78, -0.80, -0.80, -0.74, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06 ] if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) tabcol = 'pc_aftertaxinc' expected = [ np.nan, np.nan, -0.15, -0.62, -0.70, -0.73, -0.78, -0.80, -0.80, -0.74, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06 ] if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) # test creating various distribution tables dist, _ = calc2.distribution_tables(None, 'weighted_deciles') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [ 0, 0, -53644343, -65258622, -57617119, 37391333, 200879230, 329784586, 553827330, 1015854407, 1731283600, 7090603505, 10783103907, 1638192777, 2213960052, 3238450675 ] if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'num_returns_ItemDed' expected = [ 0, 0, 2561, 12610, 21936, 29172, 50890, 61563, 78247, 91823, 118523, 128886, 596211, 63986, 51634, 13266 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'expanded_income' expected = [ 0, 0, 835224673, 2639667638, 3940559051, 5286856071, 6972849344, 8881099529, 11467767759, 14761195525, 19832126806, 44213000235, 118830346631, 14399218059, 16868648076, 12945134101 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'aftertax_income' expected = [ 0, 0, 818813684, 2466000535, 3671150517, 4790979126, 6173998985, 7754183496, 9907604744, 12510477225, 16273592612, 33915377411, 98282178334, 11345456373, 13400757263, 9169163776 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) dist, _ = calc2.distribution_tables(None, 'standard_income_bins') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [ 0, 0, -42244205, -76727831, -62581860, 53797887, 217016689, 723516183, 1108097059, 3272479928, 2818979541, 950296405, 1820474110, 10783103907 ] if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'num_returns_ItemDed' expected = [ 0, 0, 1202, 13614, 27272, 34407, 48265, 117225, 103319, 181885, 61014, 5126, 2882, 596211 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) if test_failure: assert 1 == 2
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'standard_income_bins', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'pc_aftertaxinc' expected = [ np.nan, np.nan, -0.22, -0.77, -0.80, -0.56, -0.77, -0.69, -0.71, -0.67, -0.27, -0.11, -0.06, -0.58 ] if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff xbin', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'weighted_deciles', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'tot_change' expected = [ 0, 0, 241418460, 2474292614, 2770584237, 2535721686, 4444363117, 5111483934, 6321945100, 8225913647, 10597074824, 10234573879, 52957371499, 6137031947, 3513242382, 584299551 ] if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'share_of_change' expected = [ 0.00, 0.00, 0.46, 4.67, 5.23, 4.79, 8.39, 9.65, 11.94, 15.53, 20.01, 19.33, 100.00, 11.59, 6.63, 1.10 ] if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) tabcol = 'pc_aftertaxinc' expected = [ np.nan, np.nan, -0.25, -0.95, -0.73, -0.53, -0.75, -0.71, -0.68, -0.72, -0.71, -0.34, -0.58, -0.61, -0.30, -0.07 ] if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) tabcol = 'pc_aftertaxinc' expected = [ np.nan, np.nan, -0.25, -0.95, -0.73, -0.53, -0.75, -0.71, -0.68, -0.72, -0.71, -0.34, -0.58, -0.61, -0.30, -0.07 ] if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) # test creating various distribution tables dist, _ = calc2.distribution_tables(None, 'weighted_deciles') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [ 0, 0, -1962728575, 1725493747, 4396953820, 6605728718, 16774691083, 23860454276, 38350836962, 83963523110, 150930070726, 732859528574, 1057504552440, 151607017873, 234865455600, 346387055100 ] if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'num_returns_ItemDed' expected = [ 0, 0, 357019, 1448655, 2559613, 2513429, 4419624, 5275374, 6222375, 7880642, 11147728, 13023015, 54847474, 6118072, 5478575, 1426368 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'expanded_income' expected = [ 0, 0, 105133510325, 290616204980, 413576297349, 517828725223, 659857915773, 803218163892, 1042123266101, 1326558509787, 1805622773921, 4048576203396, 11013111570748, 1281956155093, 1515893182747, 1250726865556 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'aftertax_income' expected = [ 0, 0, 97194414402, 259232286042, 375389146785, 475719938941, 588002960320, 710495874184, 921012587826, 1129166044052, 1488817328688, 2998268537784, 9043299119027, 999335257953, 1151930187406, 847003092425 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) dist, _ = calc2.distribution_tables(None, 'standard_income_bins') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [ 0, 0, -544908512, -27720182, 2473153905, 9469043966, 17806661306, 56292468689, 88558244888, 298427035609, 290639143539, 99528466942, 194882962290, 1057504552440 ] if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'num_returns_ItemDed' expected = [ 0, 0, 60455, 1161281, 2323042, 3613216, 4759193, 10006287, 8785946, 17093586, 6199555, 532925, 311987, 54847474 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) if test_failure: assert 1 == 2
def xtest_create_tables(pit_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records(data=pit_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2017: {'_rate2': [0.06]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'standard_income_bins', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'pc_aftertaxinc' expected = [ np.nan, np.nan, -0.24, -0.79, -0.81, -0.55, -0.77, -0.69, -0.70, -0.67, -0.27, -0.11, -0.06, -0.58 ] if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff xbin', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'weighted_deciles', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'tot_change' expected = [ 0, 0, 254095629, 2544151690, 2826173357, 2539574809, 4426339426, 5178198524, 6277367974, 8069273960, 10572653961, 10269542170, 52957371499, 6188055374, 3497187245, 584299551 ] if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'share_of_change' expected = [ 0.00, 0.00, 0.48, 4.80, 5.34, 4.80, 8.36, 9.78, 11.85, 15.24, 19.96, 19.39, 100.00, 11.68, 6.60, 1.10 ] if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) tabcol = 'pc_aftertaxinc' expected = [ np.nan, np.nan, -0.26, -0.96, -0.74, -0.52, -0.75, -0.71, -0.68, -0.71, -0.71, -0.34, -0.58, -0.61, -0.30, -0.07 ] if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) # test creating various distribution tables dist, _ = calc2.distribution_tables(None, 'weighted_deciles') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [ 0, 0, -1818680093, 1971755805, 5010676892, 6746034269, 17979713134, 26281107130, 35678824858, 82705314943, 148818900147, 734130905355, 1057504552440, 152370476198, 234667184101, 347093245055 ] if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'num_returns_ItemDed' expected = [ 0, 0, 357019, 1523252, 2463769, 2571339, 4513934, 5278763, 6299826, 7713038, 11001450, 13125085, 54847474, 6188702, 5498415, 1437967 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'expanded_income' expected = [ 0, 0, 105927980655, 294023675202, 417068113194, 528376852001, 658853731628, 818158430558, 1037838578149, 1324689584778, 1788101751565, 4047642990187, 11020681687917, 1286581399758, 1511884268254, 1249177322176 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'aftertax_income' expected = [ 0, 0, 97735000432, 261911925318, 378049091828, 485619641327, 586208937799, 722979233740, 919208533243, 1130705156084, 1473967098213, 2994484618211, 9050869236196, 1002450732282, 1147596725957, 844437159972 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) dist, _ = calc2.distribution_tables(None, 'standard_income_bins') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [ 0, 0, -544908512, -140959365, 3354006293, 9339571323, 18473567840, 55206201916, 89276157367, 297973932010, 290155554334, 99528466942, 194882962290, 1057504552440 ] if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) tabcol = 'num_returns_ItemDed' expected = [ 0, 0, 60455, 1107780, 2366845, 3460607, 4837397, 10090391, 8850784, 17041135, 6187168, 532925, 311987, 54847474 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) if test_failure: assert 1 == 2
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='large_income_bins', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, -0.14, -0.58, -0.70, -0.71, -0.81, -0.83, -0.74, -0.65, -0.18, -0.59 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='standard_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, -0.14, -0.58, -0.70, -0.71, -0.81, -0.83, -0.74, -0.65, -0.23, -0.09, -0.06, -0.59 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='small_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, -0.29, -0.07, -0.22, -0.80, -0.65, -0.74, -0.71, -0.81, -0.83, -0.74, -0.65, -0.23, -0.09, -0.08, -0.07, -0.05, -0.02, 0.00, -0.59 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ 0, 855188, 15425829, 26212078, 33369237, 50208703, 63312937, 82312360, 90711899, 117518598, 101779164, 581705993, 62142547, 33919755, 5716862 ] tabcol = 'tot_change' if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0.00, 0.15, 2.65, 4.51, 5.74, 8.63, 10.88, 14.15, 15.59, 20.20, 17.50, 100.00, 10.68, 5.83, 0.98 ] tabcol = 'share_of_change' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [ 0.00, -0.11, -0.62, -0.71, -0.69, -0.81, -0.82, -0.82, -0.72, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [ 0.00, -0.11, -0.62, -0.71, -0.69, -0.81, -0.82, -0.82, -0.72, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06 ] tabcol = 'pc_aftertaxinc' if not np.allclose( diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) # test creating various distribution tables dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ 0, -56140397, -67237556, -58897159, 17222017, 212673684, 328116256, 573255089, 992965515, 1730626734, 7142993526, 10815577709, 1625179635, 2241659962, 3276153930 ] tabcol = 'iitax' if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0, 1202, 13981, 21932, 27445, 52318, 62509, 79749, 91861, 117068, 129463, 597527, 63940, 52137, 13387 ] tabcol = 'num_returns_ItemDed' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0, 812766585, 2639118220, 3940557055, 5243088362, 6988752253, 8827238879, 11605062543, 14729565181, 19894042635, 44374875397, 119055067109, 14255277238, 17039539254, 13080058905 ] tabcol = 'expanded_income' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0, 801755209, 2466382489, 3674186760, 4779876836, 6150380331, 7701226391, 10000914935, 12515316309, 16352910962, 34006973974, 98449924197, 11219604941, 13525917494, 9261451538 ] tabcol = 'aftertax_income' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES), groupby='standard_income_bins', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ 0, -44670465, -79534586, -61791623, 34666275, 216487136, 742113595, 1099657851, 3270948526, 2826393721, 962881064, 1848426216, 10815577709 ] tabcol = 'iitax' if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [ 0, 1202, 13625, 27355, 33694, 50236, 116751, 104035, 181572, 60936, 5196, 2924, 597527 ] tabcol = 'num_returns_ItemDed' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) if test_failure: assert 1 == 2
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {'II_rt1': {2013: 0.15}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'standard_income_bins', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'pc_aftertaxinc' expected = [np.nan, np.nan, -0.2, -0.8, -0.8, -0.5, -0.8, -0.7, -0.7, -0.7, -0.3, -0.1, -0.1, -0.6] if not np.allclose(diff[tabcol].values, expected, atol=0.1, rtol=0.0, equal_nan=True): test_failure = True print('diff xbin', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'weighted_deciles', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'tot_change' expected = [0.0, 0.0, 0.3, 2.5, 2.8, 2.5, 4.4, 5.2, 6.3, 8.1, 10.6, 10.3, 53.0, 6.2, 3.5, 0.6] if not np.allclose(diff[tabcol].values, expected, atol=0.1, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'share_of_change' expected = [0.0, 0.0, 0.5, 4.8, 5.3, 4.8, 8.3, 9.9, 11.8, 15.2, 19.9, 19.4, 100.0, 11.7, 6.6, 1.1] if not np.allclose(diff[tabcol].values, expected, atol=0.1, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'pc_aftertaxinc' expected = [np.nan, np.nan, -0.3, -1.0, -0.7, -0.5, -0.7, -0.7, -0.7, -0.7, -0.7, -0.3, -0.6, -0.6, -0.3, -0.1] if not np.allclose(diff[tabcol].values, expected, atol=0.1, rtol=0.0, equal_nan=True): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) # test creating various distribution tables dist, _ = calc2.distribution_tables(None, 'weighted_deciles') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [0.0, 0.0, -1.8, 2.0, 5.0, 6.7, 18.0, 26.1, 35.2, 82.7, 148.8, 734.1, 1056.9, 152.4, 234.7, 347.1] if not np.allclose(dist[tabcol].values, expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'count_ItemDed' expected = [0.0, 0.0, 0.4, 1.5, 2.5, 2.6, 4.5, 5.3, 6.3, 7.7, 11.0, 13.1, 54.9, 6.2, 5.5, 1.4] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'expanded_income' expected = [0.0, 0.0, 105.9, 294.0, 417.1, 528.4, 658.9, 818.2, 1037.8, 1324.7, 1788.1, 4047.6, 11020.7, 1286.6, 1511.9, 1249.2] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'aftertax_income' expected = [0.0, 0.0, 97.7, 261.9, 378.1, 485.6, 586.2, 723.2, 919.6, 1130.7, 1474.0, 2994.5, 9051.5, 1002.5, 1147.6, 844.4] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) dist, _ = calc2.distribution_tables(None, 'standard_income_bins') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [0.0, 0.0, -0.5, -0.1, 3.4, 9.3, 18.5, 54.6, 89.3, 298.0, 290.2, 99.5, 194.9, 1056.9] if not np.allclose(dist[tabcol], expected, atol=0.1, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'count_ItemDed' expected = [0.0, 0.0, 0.1, 1.1, 2.4, 3.5, 4.8, 10.1, 8.8, 17.0, 6.2, 0.5, 0.3, 54.9] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) if test_failure: assert 1 == 2
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {'II_rt1': {2013: 0.15}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'standard_income_bins', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'pc_aftertaxinc' expected = [0.0, np.nan, -0.1, -0.5, -0.7, -0.7, -0.8, -0.7, -0.7, -0.7, -0.3, -0.1, -0.0, -0.6] if not np.allclose(diff[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0, equal_nan=True): test_failure = True print('diff xbin', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'weighted_deciles', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'tot_change' expected = [0.0, 0.0, 0.0, 0.6, 2.9, 3.5, 4.4, 6.1, 6.5, 8.7, 12.0, 13.3, 58.0, 7.7, 4.8, 0.8] if not np.allclose(diff[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'share_of_change' expected = [0.0, 0.0, 0.0, 1.0, 5.0, 6.0, 7.6, 10.6, 11.1, 15.1, 20.7, 22.9, 100.0, 13.2, 8.3, 1.4,] if not np.allclose(diff[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'pc_aftertaxinc' expected = [np.nan, 0.0, -0.0, -0.3, -0.8, -0.7, -0.7, -0.8, -0.7, -0.7, -0.7, -0.3, -0.6, -0.7, -0.4, -0.1] if not np.allclose(diff[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0, equal_nan=True): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) # test creating various distribution tables dist, _ = calc2.distribution_tables(None, 'weighted_deciles') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [0.0, 0.0, -0.4, -4.1, -5.9, 8.0, 16.9, 29.0, 27.0, 71.4, 153.4, 910.1, 1205.5, 159.4, 268.1, 482.7] if not np.allclose(dist[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'count_ItemDed' expected = [0.0, 0.0, 0.0, 1.1, 2.6, 3.9, 4.7, 6.3, 6.5, 7.4, 11.3, 16.3, 60.3, 7.4, 7.2, 1.7] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'expanded_income' expected = [0.0, -1.4, 30.7, 209.8, 388.8, 541.2, 679.1, 847.6, 1097.1, 1430.7, 1978.3, 5007.6, 12209.4, 1410.9, 1765.5, 1831.2] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'aftertax_income' expected = [0.0, -1.4, 29.0, 195.5, 363.0, 491.0, 612.2, 747.1, 980.6, 1248.0, 1630.2, 3741.3, 10036.6, 1100.9, 1339.0, 1301.4] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) dist, _ = calc2.distribution_tables(None, 'standard_income_bins') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [0.0, 0.0, -1.3, -7.6, -1.2, 20.7, 26.3, 47.2, 95.5, 321.9, 324.0, 64.8, 315.2, 1205.5] if not np.allclose(dist[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'count_ItemDed' expected = [0.0, 0.0, 0.2, 1.8, 3.6, 5.9, 5.7, 10.2, 8.1, 17.7, 6.7, 0.3, 0.1, 60.3] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) if test_failure: assert 1 == 2
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {'II_rt1': {2013: 0.15}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'standard_income_bins', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'pc_aftertaxinc' expected = [ np.nan, np.nan, -0.2, -0.8, -0.8, -0.5, -0.8, -0.7, -0.7, -0.7, -0.3, -0.1, -0.1, -0.6 ] if not np.allclose( diff[tabcol].values, expected, atol=0.1, rtol=0.0, equal_nan=True): test_failure = True print('diff xbin', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'weighted_deciles', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'tot_change' expected = [ 0.0, 0.0, 0.3, 2.5, 2.8, 2.5, 4.4, 5.2, 6.3, 8.1, 10.6, 10.3, 53.0, 6.2, 3.5, 0.6 ] if not np.allclose(diff[tabcol].values, expected, atol=0.1, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'share_of_change' expected = [ 0.0, 0.0, 0.5, 4.8, 5.3, 4.8, 8.3, 9.9, 11.8, 15.2, 19.9, 19.4, 100.0, 11.7, 6.6, 1.1 ] if not np.allclose(diff[tabcol].values, expected, atol=0.1, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'pc_aftertaxinc' expected = [ np.nan, np.nan, -0.3, -1.0, -0.7, -0.5, -0.7, -0.7, -0.7, -0.7, -0.7, -0.3, -0.6, -0.6, -0.3, -0.1 ] if not np.allclose( diff[tabcol].values, expected, atol=0.1, rtol=0.0, equal_nan=True): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) # test creating various distribution tables dist, _ = calc2.distribution_tables(None, 'weighted_deciles') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [ 0.0, 0.0, -1.8, 2.0, 5.0, 6.7, 18.0, 26.1, 35.2, 82.7, 148.8, 734.1, 1056.9, 152.4, 234.7, 347.1 ] if not np.allclose(dist[tabcol].values, expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'count_ItemDed' expected = [ 0.0, 0.0, 0.4, 1.5, 2.5, 2.6, 4.5, 5.3, 6.3, 7.7, 11.0, 13.1, 54.9, 6.2, 5.5, 1.4 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'expanded_income' expected = [ 0.0, 0.0, 105.9, 294.0, 417.1, 528.4, 658.9, 818.2, 1037.8, 1324.7, 1788.1, 4047.6, 11020.7, 1286.6, 1511.9, 1249.2 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'aftertax_income' expected = [ 0.0, 0.0, 97.7, 261.9, 378.1, 485.6, 586.2, 723.2, 919.6, 1130.7, 1474.0, 2994.5, 9051.5, 1002.5, 1147.6, 844.4 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) dist, _ = calc2.distribution_tables(None, 'standard_income_bins') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [ 0.0, 0.0, -0.5, -0.1, 3.4, 9.3, 18.5, 54.6, 89.3, 298.0, 290.2, 99.5, 194.9, 1056.9 ] if not np.allclose(dist[tabcol], expected, atol=0.1, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'count_ItemDed' expected = [ 0.0, 0.0, 0.1, 1.1, 2.4, 3.5, 4.8, 10.1, 8.8, 17.0, 6.2, 0.5, 0.3, 54.9 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) if test_failure: assert 1 == 2
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {'II_rt1': {2013: 0.15}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'standard_income_bins', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'pc_aftertaxinc' expected = [ 0.0, np.nan, -0.1, -0.5, -0.7, -0.7, -0.8, -0.7, -0.7, -0.7, -0.3, -0.1, -0.0, -0.6 ] if not np.allclose(diff[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0, equal_nan=True): test_failure = True print('diff xbin', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), 'weighted_deciles', 'combined') assert isinstance(diff, pd.DataFrame) tabcol = 'tot_change' expected = [ 0.0, 0.0, 0.0, 0.6, 2.7, 3.4, 4.2, 5.8, 6.3, 8.1, 11.5, 12.7, 55.2, 7.2, 4.7, 0.8 ] if not np.allclose( diff[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'share_of_change' expected = [ 0.0, 0.0, 0.0, 1.0, 4.9, 6.1, 7.6, 10.5, 11.4, 14.7, 20.7, 23.0, 100.0, 13.0, 8.5, 1.5 ] if not np.allclose( diff[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'pc_aftertaxinc' expected = [ np.nan, 0.0, -0.0, -0.3, -0.8, -0.7, -0.7, -0.8, -0.7, -0.7, -0.7, -0.3, -0.6, -0.7, -0.4, -0.1 ] if not np.allclose(diff[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0, equal_nan=True): test_failure = True print('diff xdec', tabcol) for val in diff[tabcol].values: print('{:.1f},'.format(val)) # test creating various distribution tables dist, _ = calc2.distribution_tables(None, 'weighted_deciles') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [ 0.0, 0.0, -0.2, -3.5, -5.8, 9.0, 16.6, 28.6, 30.3, 70.2, 153.0, 893.7, 1191.8, 145.6, 269.7, 478.4 ] if not np.allclose( dist[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'count_ItemDed' expected = [ 0.0, 0.0, 0.0, 1.0, 2.9, 4.2, 4.5, 6.0, 6.0, 7.5, 11.3, 15.6, 59.0, 6.7, 7.2, 1.8 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'expanded_income' expected = [ 0.0, -1.4, 28.1, 201.5, 377.6, 536.2, 662.2, 841.1, 1053.0, 1400.7, 1923.1, 4956.7, 11978.6, 1374.6, 1743.5, 1838.6 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'aftertax_income' expected = [ 0.0, -1.4, 26.3, 187.5, 353.1, 483.4, 596.6, 743.0, 934.6, 1221.8, 1579.5, 3721.8, 9846.3, 1096.0, 1317.8, 1308.0 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) dist, _ = calc2.distribution_tables(None, 'standard_income_bins') assert isinstance(dist, pd.DataFrame) tabcol = 'iitax' expected = [ 0.0, 0.0, -1.2, -7.0, 0.2, 23.4, 26.9, 52.7, 95.5, 305.5, 321.1, 58.6, 316.2, 1191.8 ] if not np.allclose( dist[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) tabcol = 'count_ItemDed' expected = [ 0.0, 0.0, 0.2, 2.1, 3.5, 6.4, 5.8, 9.9, 7.8, 16.4, 6.5, 0.4, 0.1, 59.0 ] if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.1f},'.format(val)) if test_failure: assert 1 == 2
def summary(df1, df2, mask): """ df1 contains raw results for baseline plan df2 contains raw results for reform plan mask is the boolean array specifying records with reform-induced tax diffs returns dictionary of summary results DataFrames """ # pylint: disable=too-many-statements,too-many-locals df2 = create_results_columns(df1, df2, mask) summ = dict() # tax difference totals between reform and baseline tdiff = df2['iitax_agg'] - df1['iitax'] aggr_itax_d = (tdiff * df2['s006']).sum() tdiff = df2['payrolltax_agg'] - df1['payrolltax'] aggr_ptax_d = (tdiff * df2['s006']).sum() tdiff = df2['combined_agg'] - df1['combined'] aggr_comb_d = (tdiff * df2['s006']).sum() aggrd = [aggr_itax_d, aggr_ptax_d, aggr_comb_d] summ['aggr_d'] = pd.DataFrame(data=aggrd, index=AGGR_ROW_NAMES) # totals for baseline aggr_itax_1 = (df1['iitax'] * df1['s006']).sum() aggr_ptax_1 = (df1['payrolltax'] * df1['s006']).sum() aggr_comb_1 = (df1['combined'] * df1['s006']).sum() aggr1 = [aggr_itax_1, aggr_ptax_1, aggr_comb_1] summ['aggr_1'] = pd.DataFrame(data=aggr1, index=AGGR_ROW_NAMES) # totals for reform aggr_itax_2 = (df2['iitax_agg'] * df2['s006']).sum() aggr_ptax_2 = (df2['payrolltax_agg'] * df2['s006']).sum() aggr_comb_2 = (df2['combined_agg'] * df2['s006']).sum() aggr2 = [aggr_itax_2, aggr_ptax_2, aggr_comb_2] summ['aggr_2'] = pd.DataFrame(data=aggr2, index=AGGR_ROW_NAMES) # create difference tables grouped by xdec df2['iitax'] = df2['iitax_xdec'] summ['diff_itax_xdec'] = \ create_difference_table(df1, df2, groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='iitax') df2['payrolltax'] = df2['payrolltax_xdec'] summ['diff_ptax_xdec'] = \ create_difference_table(df1, df2, groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='payrolltax') df2['combined'] = df2['combined_xdec'] summ['diff_comb_xdec'] = \ create_difference_table(df1, df2, groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') # create difference tables grouped by xbin (removing negative-income bin) df2['iitax'] = df2['iitax_xbin'] diff_itax_xbin = \ create_difference_table(df1, df2, groupby='standard_income_bins', income_measure='expanded_income', tax_to_diff='iitax') diff_itax_xbin.drop(diff_itax_xbin.index[0], inplace=True) summ['diff_itax_xbin'] = diff_itax_xbin df2['payrolltax'] = df2['payrolltax_xbin'] diff_ptax_xbin = \ create_difference_table(df1, df2, groupby='standard_income_bins', income_measure='expanded_income', tax_to_diff='payrolltax') diff_ptax_xbin.drop(diff_ptax_xbin.index[0], inplace=True) summ['diff_ptax_xbin'] = diff_ptax_xbin df2['combined'] = df2['combined_xbin'] diff_comb_xbin = \ create_difference_table(df1, df2, groupby='standard_income_bins', income_measure='expanded_income', tax_to_diff='combined') diff_comb_xbin.drop(diff_comb_xbin.index[0], inplace=True) summ['diff_comb_xbin'] = diff_comb_xbin # create distribution tables grouped by xdec summ['dist1_xdec'] = \ create_distribution_table(df1, groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') suffix = '_xdec' df2_cols_with_suffix = [c for c in list(df2) if c.endswith(suffix)] for col in df2_cols_with_suffix: root_col_name = col.replace(suffix, '') df2[root_col_name] = df2[col] df2['expanded_income_baseline'] = df1['expanded_income'] summ['dist2_xdec'] = \ create_distribution_table(df2, groupby='weighted_deciles', income_measure='expanded_income_baseline', result_type='weighted_sum') # create distribution tables grouped by xbin (removing negative-income bin) dist1_xbin = \ create_distribution_table(df1, groupby='standard_income_bins', income_measure='expanded_income', result_type='weighted_sum') dist1_xbin.drop(dist1_xbin.index[0], inplace=True) summ['dist1_xbin'] = dist1_xbin suffix = '_xbin' df2_cols_with_suffix = [c for c in list(df2) if c.endswith(suffix)] for col in df2_cols_with_suffix: root_col_name = col.replace(suffix, '') df2[root_col_name] = df2[col] df2['expanded_income_baseline'] = df1['expanded_income'] dist2_xbin = \ create_distribution_table(df2, groupby='standard_income_bins', income_measure='expanded_income_baseline', result_type='weighted_sum') dist2_xbin.drop(dist2_xbin.index[0], inplace=True) summ['dist2_xbin'] = dist2_xbin # return dictionary of summary results return summ