def test_agg(tests_path, puf_fullsample): """ Test Tax-Calculator aggregate taxes with no policy reform using the full-sample puf.csv and a small sub-sample of puf.csv """ # pylint: disable=too-many-locals,too-many-statements nyrs = 10 # create a baseline Policy object containing 2017_law.json parameters pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) baseline_policy = Policy() baseline_policy.implement_reform(pre_tcja['policy']) # create a Records object (rec) containing all puf.csv input records recs = Records(data=puf_fullsample) # create a Calculator object using baseline policy and puf records calc = Calculator(policy=baseline_policy, records=recs) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt results to a string with a trailing EOL character adtstr = adt.to_string(float_format='%8.1f') + '\n' # create actual and expected lists of diagnostic table lines actual = adtstr.splitlines(True) aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expect = expected_results.splitlines(True) # ensure actual and expect lines have differences no more than small value diffs = nonsmall_diffs(actual, expect) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(adtstr) msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_agg_actual.txt to ---\n' msg += '--- pufcsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records fullsample = puf_fullsample rn_seed = 2222 # to ensure sub-sample is always the same subfrac = 0.05 # sub-sample fraction subsample = fullsample.sample(frac=subfrac, random_state=rn_seed) recs_subsample = Records(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] msg = '' for cyr in range(calc_start_year, calc_start_year + nyrs): reltol = 0.01 # maximum allowed relative difference in tax liability if not np.allclose( taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0, rtol=reltol): reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1. line1 = '\nPUFCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}' line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}' line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}' msg += line1.format(cyr) msg += line2.format(subfrac, reltol, rn_seed) msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr], reldiff) if msg: raise ValueError(msg)
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='large_income_bins', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, 0.01, 0.41, 0.76, 0.85, 1.06, 1.14, 1.04, 0.76, 0.19, 0.70 ] assert np.allclose(diff['perc_aftertax'].values, expected, atol=0.005, rtol=0.0, equal_nan=True) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='webapp_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, 0.01, 0.41, 0.76, 0.85, 1.06, 1.14, 1.04, 0.76, 0.26, 0.08, 0.06, 0.70 ] assert np.allclose(diff['perc_aftertax'].values, expected, atol=0.005, rtol=0.0, equal_nan=True) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='small_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [ 0.00, 0.01, 0.02, 0.15, 0.58, 0.73, 0.78, 0.85, 1.06, 1.14, 1.04, 0.76, 0.26, 0.08, 0.08, 0.07, 0.04, 0.02, np.nan, 0.70 ] assert np.allclose(diff['perc_aftertax'].values, expected, atol=0.005, rtol=0.0, equal_nan=True) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [ 14931, 276555, 7728872, 22552703, 34008512, 50233787, 76811377, 111167087, 123226970, 111414038, 537434832, 66560891, 39571078, 5282069 ] assert np.allclose(diff['tot_change'].values, expected, atol=0.5, rtol=0.0) expected = [ 0.00, 0.05, 1.44, 4.20, 6.33, 9.35, 14.29, 20.68, 22.93, 20.73, 100.00, 12.38, 7.36, 0.98 ] assert np.allclose(diff['share_of_change'].values, expected, atol=0.005, rtol=0.0) expected = [ 0.01, 0.02, 0.33, 0.70, 0.81, 0.91, 1.07, 1.18, 0.91, 0.37, 0.70, 0.69, 0.34, 0.06 ] assert np.allclose(diff['perc_aftertax'].values, expected, atol=0.005, rtol=0.0, equal_nan=True) expected = [ -0.01, -0.02, -0.33, -0.70, -0.81, -0.91, -1.07, -1.18, -0.91, -0.37, -0.70, -0.69, -0.34, -0.06 ] assert np.allclose(diff['pc_aftertaxinc'].values, expected, atol=0.005, rtol=0.0, equal_nan=True) # test creating various distribution tables dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ -8851215, -99666120, -123316561, -85895787, -47357458, 207462144, 443391189, 978487989, 1709504845, 7631268907, 10605027933, 1655597977, 2537684742, 3437986189 ] assert np.allclose(dist['iitax'].values, expected, atol=0.5, rtol=0.0) expected = [ 1202, 1688, 13506, 18019, 30130, 48244, 80994, 112788, 131260, 146001, 583832, 70258, 59834, 15909 ] assert np.allclose(dist['num_returns_ItemDed'].tolist(), expected, atol=0.5, rtol=0.0) expected = [ 158456013, 1351981790, 2383726863, 3408544081, 4569232020, 6321944661, 8520304098, 11817197884, 17299173380, 41117720202, 96948280992, 12723790026, 15769741079, 12624189098 ] assert np.allclose(dist['expanded_income'].tolist(), expected, atol=0.5, rtol=0.0) expected = [ 147367698, 1354827269, 2351611947, 3192405234, 4157431713, 5454468907, 7125788590, 9335613303, 13417244946, 29691084873, 76227844481, 9546216325, 11603328920, 8541539628 ] assert np.allclose(dist['aftertax_income'].tolist(), expected, atol=0.5, rtol=0.0) dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES), groupby='webapp_income_bins', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [ -103274, -83144506, -152523834, -129881470, 85802556, 255480678, 832529135, 1066963515, 3023956558, 2876331264, 1008672459, 1820944852, 10605027933 ] assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0) expected = [ 0, 1202, 22654, 31665, 30547, 49851, 124786, 97349, 160147, 56806, 5803, 3023, 583832 ] assert np.allclose(dist['num_returns_ItemDed'].tolist(), expected, atol=0.5, rtol=0.0)
def test_make_calculator_deepcopy(cps_subsample): pol = Policy() rec = Records.cps_constructor(data=cps_subsample) calc1 = Calculator(policy=pol, records=rec) calc2 = copy.deepcopy(calc1) assert isinstance(calc2, Calculator)
def calculate(year_n, start_year, use_puf_not_cps, use_full_sample, user_mods, behavior_allowed): """ The calculate function assumes the specified user_mods is a dictionary returned by the Calculator.read_json_param_objects() function. The function returns (calc1, calc2, mask) where calc1 is pre-reform Calculator object calculated for year_n, calc2 is post-reform Calculator object calculated for year_n, and mask is boolean array marking records with reform-induced iitax diffs Set behavior_allowed to False when generating static results or set behavior_allowed to True when generating dynamic results. """ # pylint: disable=too-many-arguments,too-many-locals # pylint: disable=too-many-branches,too-many-statements check_user_mods(user_mods) # specify Consumption instance consump = Consumption() consump_assumptions = user_mods['consumption'] consump.update_consumption(consump_assumptions) # specify growdiff_baseline and growdiff_response growdiff_baseline = Growdiff() growdiff_response = Growdiff() growdiff_base_assumps = user_mods['growdiff_baseline'] growdiff_resp_assumps = user_mods['growdiff_response'] growdiff_baseline.update_growdiff(growdiff_base_assumps) growdiff_response.update_growdiff(growdiff_resp_assumps) # create pre-reform and post-reform Growfactors instances growfactors_pre = Growfactors() growdiff_baseline.apply_to(growfactors_pre) growfactors_post = Growfactors() growdiff_baseline.apply_to(growfactors_post) growdiff_response.apply_to(growfactors_post) # create sample pd.DataFrame from specified input file and sampling scheme stime = time.time() tbi_path = os.path.abspath(os.path.dirname(__file__)) if use_puf_not_cps: # first try TaxBrain deployment path input_path = 'puf.csv.gz' if not os.path.isfile(input_path): # otherwise try local Tax-Calculator deployment path input_path = os.path.join(tbi_path, '..', '..', 'puf.csv') sampling_frac = 0.05 sampling_seed = 180 else: # if using cps input not puf input # first try Tax-Calculator code path input_path = os.path.join(tbi_path, '..', 'cps.csv.gz') if not os.path.isfile(input_path): # otherwise read from taxcalc package "egg" input_path = None # pragma: no cover full_sample = read_egg_csv('cps.csv.gz') # pragma: no cover sampling_frac = 0.03 sampling_seed = 180 if input_path: full_sample = pd.read_csv(input_path) if use_full_sample: sample = full_sample else: sample = full_sample.sample( # pylint: disable=no-member frac=sampling_frac, random_state=sampling_seed ) if use_puf_not_cps: print('puf-read-time= {:.1f}'.format(time.time() - stime)) else: print('cps-read-time= {:.1f}'.format(time.time() - stime)) # create pre-reform Calculator instance if use_puf_not_cps: recs1 = Records(data=copy.deepcopy(sample), gfactors=growfactors_pre) else: recs1 = Records.cps_constructor(data=copy.deepcopy(sample), gfactors=growfactors_pre) policy1 = Policy(gfactors=growfactors_pre) calc1 = Calculator(policy=policy1, records=recs1, consumption=consump) while calc1.current_year < start_year: calc1.increment_year() calc1.calc_all() assert calc1.current_year == start_year # compute mask array res1 = calc1.dataframe(DIST_VARIABLES) if use_puf_not_cps: # create pre-reform Calculator instance with extra income recs1p = Records(data=copy.deepcopy(sample), gfactors=growfactors_pre) # add one dollar to the income of each filing unit to determine # which filing units undergo a resulting change in tax liability recs1p.e00200 += 1.0 # pylint: disable=no-member recs1p.e00200p += 1.0 # pylint: disable=no-member policy1p = Policy(gfactors=growfactors_pre) # create Calculator with recs1p and calculate for start_year calc1p = Calculator(policy=policy1p, records=recs1p, consumption=consump) while calc1p.current_year < start_year: calc1p.increment_year() calc1p.calc_all() assert calc1p.current_year == start_year # compute mask showing which of the calc1 and calc1p results differ; # mask is true if a filing unit's income tax liability changed after # a dollar was added to the filing unit's wage and salary income res1p = calc1p.dataframe(DIST_VARIABLES) mask = np.logical_not( # pylint: disable=no-member np.isclose(res1.iitax, res1p.iitax, atol=0.001, rtol=0.0) ) assert np.any(mask) else: # if use_cps_not_cps is False # indicate that no fuzzing of reform results is required mask = np.zeros(res1.shape[0], dtype=np.int8) # specify Behavior instance behv = Behavior() behavior_assumps = user_mods['behavior'] behv.update_behavior(behavior_assumps) # always prevent both behavioral response and growdiff response if behv.has_any_response() and growdiff_response.has_any_response(): msg = 'BOTH behavior AND growdiff_response HAVE RESPONSE' raise ValueError(msg) # optionally prevent behavioral response if behv.has_any_response() and not behavior_allowed: msg = 'A behavior RESPONSE IS NOT ALLOWED' raise ValueError(msg) # create post-reform Calculator instance if use_puf_not_cps: recs2 = Records(data=copy.deepcopy(sample), gfactors=growfactors_post) else: recs2 = Records.cps_constructor(data=copy.deepcopy(sample), gfactors=growfactors_post) policy2 = Policy(gfactors=growfactors_post) policy_reform = user_mods['policy'] policy2.implement_reform(policy_reform) calc2 = Calculator(policy=policy2, records=recs2, consumption=consump, behavior=behv) while calc2.current_year < start_year: calc2.increment_year() calc2.calc_all() assert calc2.current_year == start_year # increment Calculator objects for year_n years and calculate for _ in range(0, year_n): calc1.increment_year() calc2.increment_year() calc1.calc_all() if calc2.behavior.has_response(): calc2 = Behavior.response(calc1, calc2) else: calc2.calc_all() # return calculated Calculator objects and mask return (calc1, calc2, mask)
def test_agg(tests_path, cps_fullsample): """ Test current-law aggregate taxes using cps.csv file. """ # pylint: disable=too-many-statements,too-many-locals nyrs = 10 # create a baseline Policy object containing 2017_law.json parameters pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) baseline_policy = Policy() baseline_policy.implement_reform(pre_tcja['policy']) # create a Records object (rec) containing all cps.csv input records recs = Records.cps_constructor(data=cps_fullsample) # create a Calculator object using baseline policy and cps records calc = Calculator(policy=baseline_policy, records=recs) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt to a string with a trailing EOL character actual_results = adt.to_string(float_format='%8.1f') + '\n' # read expected results from file aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt # ensure actual and expected results have no nonsmall differences diffs = nonsmall_diffs(actual_results.splitlines(True), expected_results.splitlines(True)) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(actual_results) msg = 'CPSCSV AGG RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy cpscsv_agg_actual.txt to ---\n' msg += '--- cpscsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.03 # sub-sample fraction subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed) recs_subsample = Records.cps_constructor(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] msg = '' for cyr in range(calc_start_year, calc_start_year + nyrs): if cyr == calc_start_year: reltol = 0.014 else: reltol = 0.006 if not np.allclose( taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0, rtol=reltol): reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1. line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}' line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}' line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}' msg += line1.format(cyr) msg += line2.format(subfrac, reltol, rn_seed) msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr], reldiff) if msg: raise ValueError(msg)
def test_create_tables(cps_subsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records.cps_constructor(data=cps_subsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2013: {'_II_rt1': [0.15]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec) calc2.calc_all() test_failure = False # test creating various difference tables diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='large_income_bins', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [np.nan, np.nan, -0.14, -0.58, -0.71, -0.70, -0.83, -0.81, -0.73, -0.65, -0.18, -0.59] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='standard_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [np.nan, np.nan, -0.14, -0.58, -0.71, -0.70, -0.83, -0.81, -0.73, -0.65, -0.23, -0.09, -0.06, -0.59] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='small_income_bins', income_measure='expanded_income', tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [np.nan, np.nan, -0.29, -0.07, -0.23, -0.78, -0.66, -0.74, -0.70, -0.83, -0.81, -0.73, -0.65, -0.23, -0.09, -0.08, -0.07, -0.05, -0.02, np.nan, -0.59] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES), calc2.dataframe(DIFF_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [0, 0, 1037894, 16199646, 25518793, 34455230, 49661093, 62344194, 82290396, 90006817, 117415735, 101818106, 580747904, 62408600, 33771695, 5637811] tabcol = 'tot_change' if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.0f},'.format(val)) expected = [0.00, 0.00, 0.18, 2.79, 4.39, 5.93, 8.55, 10.74, 14.17, 15.50, 20.22, 17.53, 100.00, 10.75, 5.82, 0.97] tabcol = 'share_of_change' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [np.nan, np.nan, -0.13, -0.65, -0.68, -0.71, -0.79, -0.80, -0.82, -0.71, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [np.nan, np.nan, -0.13, -0.65, -0.68, -0.71, -0.79, -0.80, -0.82, -0.71, -0.71, -0.30, -0.59, -0.55, -0.25, -0.06] tabcol = 'pc_aftertaxinc' if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0, equal_nan=True): test_failure = True print('diff', tabcol) for val in diff[tabcol].values: print('{:.2f},'.format(val)) # test creating various distribution tables dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES), groupby='weighted_deciles', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [0, 0, -54678669, -64005792, -64426464, 32739840, 207396898, 317535861, 575238615, 984782596, 1731373913, 7082515174, 10748471972, 1622921432, 2217477146, 3242116596] tabcol = 'iitax' if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, 0, 2561, 13268, 21368, 28377, 53186, 60433, 79779, 91010, 117445, 128784, 596211, 63766, 51681, 13337] tabcol = 'num_returns_ItemDed' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, 0, 836765692, 2661991174, 3978757611, 5306258004, 7022134388, 8871843614, 11530190180, 14721635194, 19860290487, 44177752076, 118967618420, 14296456955, 16895894429, 12985400692] tabcol = 'expanded_income' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, 0, 821526457, 2483359936, 3714540881, 4821394144, 6200512981, 7763298300, 9921184240, 12527297334, 16314596486, 33886371300, 98454082058, 11265497052, 13416447851, 9204426396] tabcol = 'aftertax_income' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES), groupby='standard_income_bins', income_measure='expanded_income', result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [0, 0, -43150804, -77526808, -64845122, 43303823, 225370761, 723847940, 1098042284, 3264499170, 2808160213, 950296405, 1820474110, 10748471972] tabcol = 'iitax' if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, 0, 1202, 13614, 27319, 33655, 50186, 116612, 103896, 181192, 60527, 5126, 2882, 596211] tabcol = 'num_returns_ItemDed' if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0): test_failure = True print('dist', tabcol) for val in dist[tabcol].values: print('{:.0f},'.format(val)) if test_failure: assert 1 == 2
def test_create_distribution_tables(pit_fullsample, cit_fullsample): # pylint: disable=too-many-statements,too-many-branches # create a current-law Policy object and Calculator object calc1 rec = Records(data=pit_fullsample) crec = CorpRecords(data=cit_fullsample) pol = Policy() calc1 = Calculator(policy=pol, records=rec, corprecords=crec) calc1.calc_all() # create a policy-reform Policy object and Calculator object calc2 reform = {2017: {'_rate2': [0.06]}} pol.implement_reform(reform) calc2 = Calculator(policy=pol, records=rec, corprecords=crec) calc2.calc_all() test_failure = False # test creating various distribution tables dist, _ = calc2.distribution_tables(None, 'weighted_deciles') assert isinstance(dist, pd.DataFrame) tabcol = 'pitax' expected = [0.000, 0.000, 0.000, 0.000, 0.000, 1.962, 5.711, 14.602, 45.503, 163.177, 397.795, 1018.520, 1647.270, 331.218, 384.399, 302.903] if not np.allclose(dist[tabcol].values, expected): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.3f},'.format(val)) tabcol = 'GTI' expected = [0.000, 0.000, 688.359, 893.687, 1107.005, 1332.670, 1605.580, 1824.545, 2327.660, 2818.092, 3848.954, 6071.569, 22518.121, 2490.655, 2119.235, 1461.678] if not np.allclose(dist[tabcol].tolist(), expected): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.3f},'.format(val)) dist, _ = calc2.distribution_tables(None, 'standard_income_bins') assert isinstance(dist, pd.DataFrame) tabcol = 'pitax' expected = [0.000, 0.000, 8.334, 279.113, 542.762, 401.310, 415.751, 0.000, 0.000, 0.000, 0.000, 1647.270] if not np.allclose(dist[tabcol], expected): test_failure = True print('dist xbin', tabcol) for val in dist[tabcol].values: print('{:.3f},'.format(val)) tabcol = 'GTI' expected = [0.000, 0.000, 5884.790, 7399.792, 4810.526, 2392.643, 2030.370, 0.000, 0.000, 0.000, 0.000, 22518.121] if not np.allclose(dist[tabcol].tolist(), expected): test_failure = True print('dist xdec', tabcol) for val in dist[tabcol].values: print('{:.3f},'.format(val)) """
def test_reform_json_and_output(tests_path): """ Check that each JSON reform file can be converted into a reform dictionary that can then be passed to the Policy class implement_reform() method that generates no reform_errors. Then use each reform to generate static tax results for small set of filing units in a single tax_year and compare those results with expected results from a text file. """ # pylint: disable=too-many-statements,too-many-locals used_dist_stats = ['c00100', # AGI 'c04600', # personal exemptions 'standard', # standard deduction 'c04800', # regular taxable income 'c05800', # income tax before credits 'iitax', # income tax after credits 'payrolltax', # payroll taxes 'aftertax_income'] # aftertax expanded income unused_dist_stats = set(DIST_TABLE_COLUMNS) - set(used_dist_stats) renamed_columns = {'c00100': 'AGI', 'c04600': 'pexempt', 'standard': 'stdded', 'c04800': 'taxinc', 'c05800': 'tax-wo-credits', 'iitax': 'inctax', 'payrolltax': 'paytax', 'aftertax_income': 'ataxinc'} # embedded function used only in test_reform_json_and_output def write_distribution_table(calc, resfilename): """ Write abbreviated distribution table calc to file with resfilename. """ dist, _ = calc.distribution_tables(None, groupby='large_income_bins') for stat in unused_dist_stats: del dist[stat] dist = dist[used_dist_stats] dist.rename(mapper=renamed_columns, axis='columns', inplace=True) pd.options.display.float_format = '{:7.0f}'.format with open(resfilename, 'w') as resfile: dist.to_string(resfile) # embedded function used only in test_reform_json_and_output def res_and_out_are_same(base): """ Return true if base.res and base.out file contents are the same; return false if base.res and base.out file contents differ. """ with open(base + '.out') as outfile: exp_res = outfile.read() exp = exp_res.splitlines(True) with open(base + '.res') as resfile: act_res = resfile.read() act = act_res.splitlines(True) # check that act & exp have differences no more than small value diffs = nonsmall_diffs(act, exp, small=1e-6) return not diffs # specify Records object containing cases data tax_year = 2020 cases_path = os.path.join(tests_path, '..', 'reforms', 'cases.csv') cases = Records(data=cases_path, gfactors=None, # keeps raw data unchanged weights=None, adjust_ratios=None, start_year=tax_year) # set raw input data year # specify list of reform failures failures = list() # specify current-law-policy Calculator object calc1 = Calculator(policy=Policy(), records=cases, verbose=False) calc1.advance_to_year(tax_year) calc1.calc_all() res_path = cases_path.replace('cases.csv', 'clp.res') write_distribution_table(calc1, res_path) if res_and_out_are_same(res_path.replace('.res', '')): os.remove(res_path) else: failures.append(res_path) # check reform file contents and reform results for each reform reforms_path = os.path.join(tests_path, '..', 'reforms', '*.json') json_reform_files = glob.glob(reforms_path) for jrf in json_reform_files: # read contents of jrf (JSON reform file) with open(jrf, 'r') as jfile: jrf_text = jfile.read() # check that jrf_text has "policy" that can be implemented as a reform if '"policy"' in jrf_text: gdiffbase = {} gdiffresp = {} # pylint: disable=protected-access policy_dict = ( Calculator._read_json_policy_reform_text(jrf_text, gdiffbase, gdiffresp) ) pol = Policy() pol.implement_reform(policy_dict) assert not pol.reform_errors calc2 = Calculator(policy=pol, records=cases, verbose=False) calc2.advance_to_year(tax_year) calc2.calc_all() res_path = jrf.replace('.json', '.res') write_distribution_table(calc2, res_path) if res_and_out_are_same(res_path.replace('.res', '')): os.remove(res_path) else: failures.append(res_path) else: # jrf_text has no "policy" key msg = 'ERROR: missing policy key in file: {}' raise ValueError(msg.format(os.path.basename(jrf))) if failures: msg = 'Following reforms have res-vs-out differences:\n' for ref in failures: msg += '{}\n'.format(os.path.basename(ref)) raise ValueError(msg)
def test_make_Calculator_raises_on_no_policy(records_2009): with pytest.raises(ValueError): calc = Calculator(records=records_2009)
def test_mtr(tests_path, puf_path): """ Test Tax-Calculator marginal tax rates with no policy reform using puf.csv Compute histograms for each marginal tax rate income type using sample input from the puf.csv file and writing output to a string, which is then compared for differences with EXPECTED_MTR_RESULTS. """ # pylint: disable=too-many-locals,too-many-statements assert len(PTAX_MTR_BIN_EDGES) == len(ITAX_MTR_BIN_EDGES) # construct actual results string, res res = '' if MTR_NEG_DIFF: res += 'MTR computed using NEGATIVE finite_diff ' else: res += 'MTR computed using POSITIVE finite_diff ' res += 'for tax year {}\n'.format(MTR_TAX_YEAR) # create a Policy object (clp) containing current-law policy parameters clp = Policy() clp.set_year(MTR_TAX_YEAR) # create a Records object (puf) containing puf.csv input records puf = Records(data=puf_path) recid = puf.RECID # pylint: disable=no-member # create a Calculator object using clp policy and puf records calc = Calculator(policy=clp, records=puf) res += '{} = {}\n'.format('Total number of data records', puf.dim) res += 'PTAX mtr histogram bin edges:\n' res += ' {}\n'.format(PTAX_MTR_BIN_EDGES) res += 'ITAX mtr histogram bin edges:\n' res += ' {}\n'.format(ITAX_MTR_BIN_EDGES) variable_header = 'PTAX and ITAX mtr histogram bin counts for' # compute marginal tax rate (mtr) histograms for each mtr variable for var_str in Calculator.MTR_VALID_VARIABLES: zero_out = (var_str == 'e01400') (mtr_ptax, mtr_itax, _) = calc.mtr(variable_str=var_str, negative_finite_diff=MTR_NEG_DIFF, zero_out_calculated_vars=zero_out, wrt_full_compensation=False) if zero_out: # check that calculated variables are consistent crs = calc.records assert np.allclose(crs.iitax + crs.payrolltax, crs.combined) assert np.allclose(crs.ptax_was + crs.setax + crs.ptax_amc, crs.payrolltax) assert np.allclose(crs.c21060 - crs.c21040, crs.c04470) assert np.allclose(crs.taxbc + crs.c09600, crs.c05800) assert np.allclose(crs.c05800 + crs.othertaxes - crs.c07100, crs.c09200) assert np.allclose(crs.c09200 - crs.refund, crs.iitax) if var_str == 'e00200s': # only MARS==2 filing units have valid MTR values mtr_ptax = mtr_ptax[calc.records.MARS == 2] mtr_itax = mtr_itax[calc.records.MARS == 2] res += '{} {}:\n'.format(variable_header, var_str) res += mtr_bin_counts(mtr_ptax, PTAX_MTR_BIN_EDGES, recid) res += mtr_bin_counts(mtr_itax, ITAX_MTR_BIN_EDGES, recid) # generate differences between actual and expected results actual = res.splitlines(True) mtrres_path = os.path.join(tests_path, 'pufcsv_mtr_expect.txt') with open(mtrres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expected = expected_results.splitlines(True) diff = difflib.unified_diff(expected, actual, fromfile='expected', tofile='actual', n=0) # convert diff generator into a list of lines: diff_lines = list() for line in diff: diff_lines.append(line) # test failure if there are any diff_lines if diff_lines: new_filename = '{}{}'.format(mtrres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(res) msg = 'PUFCSV MTR RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_mtr_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_mtr_actual.txt to ---\n' msg += '--- pufcsv_mtr_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg)
def test_agg(tests_path, puf_fullsample): """ Test Tax-Calculator aggregate taxes with no policy reform using the full-sample puf.csv and a small sub-sample of puf.csv """ # pylint: disable=too-many-locals,too-many-statements nyrs = 10 # create a Policy object (clp) containing current-law policy parameters clp = Policy() # create a Records object (rec) containing all puf.csv input records rec = Records(data=puf_fullsample) # create a Calculator object using clp policy and puf records calc = Calculator(policy=clp, records=rec) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt results to a string with a trailing EOL character adtstr = adt.to_string() + '\n' # generate differences between actual and expected results actual = adtstr.splitlines(True) aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expected = expected_results.splitlines(True) diff = difflib.unified_diff(expected, actual, fromfile='expected', tofile='actual', n=0) # convert diff generator into a list of lines: diff_lines = list() for line in diff: diff_lines.append(line) # test failure if there are any diff_lines if diff_lines: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(adtstr) msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_agg_actual.txt to ---\n' msg += '--- pufcsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records fullsample = puf_fullsample rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.05 # sub-sample fraction subsample = fullsample.sample(frac=subfrac, random_state=rn_seed) rec_subsample = Records(data=subsample) calc_subsample = Calculator(policy=Policy(), records=rec_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] reltol = 0.01 # maximum allowed relative difference in tax liability if not np.allclose(taxes_subsample, taxes_fullsample, atol=0.0, rtol=reltol): msg = 'PUFCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n' msg += 'WHEN subfrac={:.3f}, rtol={:.4f}, seed={}\n'.format(subfrac, reltol, rn_seed) it_sub = np.nditer(taxes_subsample, flags=['f_index']) it_all = np.nditer(taxes_fullsample, flags=['f_index']) while not it_sub.finished: cyr = it_sub.index + calc_start_year tax_sub = float(it_sub[0]) tax_all = float(it_all[0]) reldiff = abs(tax_sub - tax_all) / abs(tax_all) if reldiff > reltol: msgstr = ' year,sub,full,reldiff= {}\t{:.2f}\t{:.2f}\t{:.4f}\n' msg += msgstr.format(cyr, tax_sub, tax_all, reldiff) it_sub.iternext() it_all.iternext() raise ValueError(msg)
def test_reform_json_and_output(): """ Check that each JSON reform file can be converted into a reform dictionary that can then be passed to the Policy class implement_reform method that generates no parameter_errors. Then use each reform to generate static tax results for small set of filing units in a single tax_year and compare those results with expected results from a CSV-formatted file. """ # pylint: disable=too-many-statements,too-many-locals # embedded function used only in test_reform_json_and_output def write_res_file(calc, resfilename): """ Write calc output to CSV-formatted file with resfilename. """ varlist = [ 'RECID', 'c00100', 'standard', 'c04800', 'iitax', 'payrolltax' ] # varnames AGI STD TaxInc ITAX PTAX stats = calc.dataframe(varlist) stats['RECID'] = stats['RECID'].astype(int) with open(resfilename, 'w') as resfile: stats.to_csv(resfile, index=False, float_format='%.2f') # embedded function used only in test_reform_json_and_output def res_and_out_are_same(base): """ Return True if base.res.csv and base.out.csv file contents are same; return False if base.res.csv and base.out.csv file contents differ. """ resdf = pd.read_csv(base + '.res.csv') outdf = pd.read_csv(base + '.out.csv') diffs = False for col in resdf: if col in outdf: if not np.allclose(resdf[col], outdf[col]): diffs = True else: diffs = True return not diffs # specify Records object containing cases data tax_year = 2020 cases_path = os.path.join(CUR_PATH, '..', 'taxcalc', 'cases.csv') cases = Records( data=cases_path, start_year=tax_year, # set raw input data year gfactors=None, # keeps raw data unchanged weights=None, adjust_ratios=None) # specify list of reform failures failures = list() # specify current-law-policy Calculator object calc = Calculator(policy=Policy(), records=cases, verbose=False) calc.advance_to_year(tax_year) calc.calc_all() res_path = cases_path.replace('cases.csv', 'clp.res.csv') write_res_file(calc, res_path) if res_and_out_are_same(res_path.replace('.res.csv', '')): os.remove(res_path) else: failures.append(res_path) del calc # read 2017_law.json reform file and specify its parameters dictionary pre_tcja_jrf = os.path.join(CUR_PATH, '..', 'taxcalc', '2017_law.json') pre_tcja = Policy.read_json_reform(pre_tcja_jrf) # check reform file contents and reform results for each reform reforms_path = os.path.join(CUR_PATH, '..', 'taxcalc', '*.json') json_reform_files = glob.glob(reforms_path) for jrf in json_reform_files: # determine reform's baseline by reading contents of jrf with open(jrf, 'r') as rfile: jrf_text = rfile.read() pre_tcja_baseline = 'Reform_Baseline: 2017_law.json' in jrf_text # implement the reform relative to its baseline reform = Policy.read_json_reform(jrf_text) pol = Policy() # current-law policy if pre_tcja_baseline: pol.implement_reform(pre_tcja) assert not pol.parameter_errors pol.implement_reform(reform) assert not pol.parameter_errors calc = Calculator(policy=pol, records=cases, verbose=False) calc.advance_to_year(tax_year) calc.calc_all() res_path = jrf.replace('.json', '.res.csv') write_res_file(calc, res_path) if res_and_out_are_same(res_path.replace('.res.csv', '')): os.remove(res_path) else: failures.append(res_path) del calc if failures: msg = 'Following reforms have res-vs-out differences:\n' for ref in failures: msg += '{}\n'.format(os.path.basename(ref)) raise ValueError(msg)
def test_itemded_component_amounts(year, cvname, hcname, puf_fullsample): """ Check that all c04470 components are adjusted to reflect the filing unit's standard-vs-itemized-deduction decision. Check for 2018 (when current law has no Pease phaseout of itemized deductions and already has complete haircuts for Casualty and Miscellaneous deductions) and 2017 (when current law has a Pease phaseout of itemized deductions and has no haircuts). The calcfunctions.py code makes no attempt to adjust the components for the effects of Pease-like phaseout or any other type of limitation on total itemized deductions, so the pre-2018 tests here use c21060, instead of c04470, as the itemized deductions total. """ # pylint: disable=too-many-locals recs = Records(data=puf_fullsample) # policy1 such that everybody itemizes deductions and all are allowed policy1 = Policy() reform1 = { 'STD_Aged': {year: [0.0, 0.0, 0.0, 0.0, 0.0]}, 'STD': {year: [0.0, 0.0, 0.0, 0.0, 0.0]} } policy1.implement_reform(reform1) assert not policy1.parameter_errors # policy2 such that everybody itemizes deductions but one is disallowed policy2 = Policy() reform2 = { 'STD_Aged': {year: [0.0, 0.0, 0.0, 0.0, 0.0]}, 'STD': {year: [0.0, 0.0, 0.0, 0.0, 0.0]}, hcname: {year: 1.0} } policy2.implement_reform(reform2) assert not policy2.parameter_errors # compute tax liability in specified year calc1 = Calculator(policy=policy1, records=recs, verbose=False) calc1.advance_to_year(year) calc1.calc_all() calc2 = Calculator(policy=policy2, records=recs, verbose=False) calc2.advance_to_year(year) calc2.calc_all() # confirm that nobody is taking the standard deduction assert np.allclose(calc1.array('standard'), 0.) assert np.allclose(calc2.array('standard'), 0.) # calculate different in total itemized deductions if year == 2017: # pre-Pease limitation total itemized deductions itmded1 = calc1.weighted_total('c21060') * 1e-9 itmded2 = calc2.weighted_total('c21060') * 1e-9 elif year == 2018: # total itemized deductions (no Pease-like limitation) itmded1 = calc1.weighted_total('c04470') * 1e-9 itmded2 = calc2.weighted_total('c04470') * 1e-9 else: raise ValueError('illegal year value = {}'.format(year)) difference_in_total_itmded = itmded1 - itmded2 # calculate itemized component amount component_amt = calc1.weighted_total(cvname) * 1e-9 # confirm that component amount is equal to difference in total deductions if year == 2017 and cvname == 'c19700': atol = 0.009 elif year == 2017 and cvname == 'c19200': atol = 0.010 else: atol = 0.00001 if not np.allclose(component_amt, difference_in_total_itmded, atol=atol): txt = '\n{}={:.3f} != {:.3f}=difference_in_total_itemized_deductions' msg = txt.format(cvname, component_amt, difference_in_total_itmded) raise ValueError(msg)
def test_agg(tests_path): """ Test current-law aggregate taxes using cps.csv file. """ # pylint: disable=too-many-statements,too-many-locals nyrs = 10 # create a Policy object (clp) containing current-law policy parameters clp = Policy() # create a Records object (rec) containing all cps.csv input records rec = Records.cps_constructor() # create a Calculator object using clp policy and cps records calc = Calculator(policy=clp, records=rec) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt to a string with a trailing EOL character actual_results = adt.to_string() + '\n' act = actual_results.splitlines(True) # read expected results from file aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt exp = expected_results.splitlines(True) # ensure act and exp line lists have differences less than "small" value epsilon = 1e-6 if sys.version_info.major == 2: small = epsilon # tighter test for Python 2.7 else: small = 0.1 + epsilon # looser test for Python 3.6 diff_lines = list() assert len(act) == len(exp) for actline, expline in zip(act, exp): if actline == expline: continue diffs = line_diff_list(actline, expline, small) if diffs: diff_lines.extend(diffs) # test failure if there are any diff_lines if diff_lines: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(actual_results) msg = 'CPSCSV AGG RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy cpscsv_agg_actual.txt to ---\n' msg += '--- cpscsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' for line in diff_lines: msg += line msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records cps_filepath = os.path.join(tests_path, '..', 'cps.csv.gz') fullsample = pd.read_csv(cps_filepath) rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.03 # sub-sample fraction subsample = fullsample.sample(frac=subfrac, random_state=rn_seed) rec_subsample = Records(data=subsample, gfactors=Growfactors(), weights=Records.CPS_WEIGHTS_FILENAME, adjust_ratios=Records.CPS_RATIOS_FILENAME, start_year=Records.CPSCSV_YEAR) calc_subsample = Calculator(policy=Policy(), records=rec_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] reltol = 0.01 # maximum allowed relative difference in tax liability # TODO: skip first year because of BUG in cps_weights.csv file taxes_subsample = taxes_subsample[1:] # TODO: eliminate code taxes_fullsample = taxes_fullsample[1:] # TODO: eliminate code if not np.allclose( taxes_subsample, taxes_fullsample, atol=0.0, rtol=reltol): msg = 'CPSCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n' msg += 'WHEN subfrac={:.3f}, rtol={:.4f}, seed={}\n'.format( subfrac, reltol, rn_seed) it_sub = np.nditer(taxes_subsample, flags=['f_index']) it_all = np.nditer(taxes_fullsample, flags=['f_index']) while not it_sub.finished: cyr = it_sub.index + calc_start_year tax_sub = float(it_sub[0]) tax_all = float(it_all[0]) reldiff = abs(tax_sub - tax_all) / abs(tax_all) if reldiff > reltol: msgstr = ' year,sub,full,reldiff= {}\t{:.2f}\t{:.2f}\t{:.4f}\n' msg += msgstr.format(cyr, tax_sub, tax_all, reldiff) it_sub.iternext() it_all.iternext() raise ValueError(msg)
def test_Calculator_diagnostic_table(): policy = Policy() puf = Records(data=TAXDATA, weights=WEIGHTS, start_year=Records.PUF_YEAR) calc = Calculator(policy=policy, records=puf) calc.diagnostic_table()
def test_Calculator_attr_access_to_policy(records_2009): policy = Policy() calc = Calculator(policy=policy, records=records_2009) assert hasattr(calc.records, 'c01000') assert hasattr(calc.policy, '_AMT_Child_em') assert hasattr(calc, 'policy')
def test_make_Calculator(): parm = Policy() assert parm.current_year == 2013 recs = Records(data=TAXDATA, weights=WEIGHTS, start_year=2009) calc = Calculator(policy=parm, records=recs) assert calc.current_year == 2013
def test_Calculator_create_diagnostic_table(records_2009): calc = Calculator(policy=Policy(), records=records_2009) calc.calc_all() adt = create_diagnostic_table(calc) assert isinstance(adt, pd.DataFrame)
def test_compatible_data(cps_subsample, puf_subsample, allparams, reform_xx, tc_objs, allparams_batch): """ Test that the compatible_data attribute in policy_current_law.json is accurate by implementing the min and max values of each parameter as reforms and ensuring that revenue differs from baseline when for at least one of these reforms when using datasets marked compatible and does not differ when using datasets marked as incompatible. """ # pylint: disable=too-many-arguments,too-many-locals # pylint: disable=too-many-statements,too-many-branches # Get taxcalc objects from tc_objs fixture rec_xx, c_xx, puftest = tc_objs # These parameters are exempt because they are not active under # current law and activating them would deactivate other parameters, # or if it is difficult to devise a test for them. exempt_from_testing = ['_CG_ec', '_CG_reinvest_ec_rt', '_ACTC_ChildNum', '_CR_SchR_hc'] # Loop through the parameters in allparams_batch errmsg = 'ERROR: {} {}\n' errors = '' for pname in allparams_batch: param = allparams_batch[pname] max_listed = param['range']['max'] # handle links to other params or self if isinstance(max_listed, str): if max_listed == 'default': max_val = param['value'][-1] else: max_val = allparams[max_listed]['value'][0] if not isinstance(max_listed, str): if isinstance(param['value'][0], list): max_val = [max_listed] * len(param['value'][0]) else: max_val = max_listed min_listed = param['range']['min'] if isinstance(min_listed, str): if min_listed == 'default': min_val = param['value'][-1] else: min_val = allparams[min_listed]['value'][0] if not isinstance(min_listed, str): if isinstance(param['value'][0], list): min_val = [min_listed] * len(param['value'][0]) else: min_val = min_listed # create reform dictionaries max_reform = copy.deepcopy(reform_xx) min_reform = copy.deepcopy(reform_xx) max_reform[XX_YEAR][str(pname)] = [max_val] min_reform[XX_YEAR][str(pname)] = [min_val] # assess whether max reform changes results if puftest: rec_yy = Records(data=puf_subsample) else: rec_yy = Records.cps_constructor(data=cps_subsample) p_yy = Policy() p_yy.implement_reform(max_reform, raise_errors=False) c_yy = Calculator(policy=p_yy, records=rec_yy, verbose=False) c_yy.advance_to_year(TEST_YEAR) c_yy.calc_all() if pname.startswith('_BEN') and pname.endswith('_repeal'): max_reform_change = ( c_yy.weighted_total('benefit_cost_total') - c_xx.weighted_total('benefit_cost_total') ) else: max_reform_change = ( c_yy.weighted_total('combined') - c_xx.weighted_total('combined') ) min_reform_change = 0 # assess whether min reform changes results, if max reform did not if max_reform_change == 0: p_yy = Policy() p_yy.implement_reform(min_reform, raise_errors=False) c_yy = Calculator(policy=p_yy, records=rec_xx) c_yy.advance_to_year(TEST_YEAR) c_yy.calc_all() if pname.startswith('_BEN') and pname.endswith('_repeal'): min_reform_change = ( c_yy.weighted_total('benefit_cost_total') - c_xx.weighted_total('benefit_cost_total') ) else: min_reform_change = ( c_yy.weighted_total('combined') - c_xx.weighted_total('combined') ) if min_reform_change == 0 and pname not in exempt_from_testing: if puftest: if param['compatible_data']['puf'] is True: errors += errmsg.format(pname, 'is not True for puf') else: if param['compatible_data']['cps'] is True: errors += errmsg.format(pname, 'is not True for cps') if max_reform_change != 0 or min_reform_change != 0: if puftest: if param['compatible_data']['puf'] is False: errors += errmsg.format(pname, 'is not False for puf') else: if param['compatible_data']['cps'] is False: errors += errmsg.format(pname, 'is not False for cps') # test failure if any errors if errors: print(errors) assert 'compatible_data' == 'invalid'
def test_make_Calculator_deepcopy(records_2009): parm = Policy() calc1 = Calculator(policy=parm, records=records_2009) calc2 = copy.deepcopy(calc1) assert isinstance(calc2, Calculator)
def get_calculator(baseline, calculator_start_year, reform=None, data=None, gfactors=None, weights=None, records_start_year=PUF_START_YEAR): ''' This function creates the tax calculator object with the policy specified in reform and the data specified with the data kwarg. Args: baseline (boolean): True if baseline tax policy calculator_start_year (int): first year of budget window reform (dictionary): IIT policy reform parameters, None if baseline data (DataFrame or str): DataFrame or path to datafile for Records object gfactors (Tax-Calculator GrowthFactors object): growth factors to use to extrapolate data over budget window weights (DataFrame): weights for Records object records_start_year (int): the start year for the data and weights dfs (default is set to the PUF start year as defined in the Tax-Calculator project) Returns: calc1 (Tax-Calculator Calculator object): Calulator object with current_year equal to calculator_start_year ''' # create a calculator policy1 = Policy() if data is not None and "cps" in data: records1 = Records.cps_constructor() # impute short and long term capital gains if using CPS data # in 2012 SOI data 6.587% of CG as short-term gains records1.p22250 = 0.06587 * records1.e01100 records1.p23250 = (1 - 0.06587) * records1.e01100 # set total capital gains to zero records1.e01100 = np.zeros(records1.e01100.shape[0]) elif data is not None: # pragma: no cover records1 = Records(data=data, gfactors=gfactors, weights=weights, start_year=records_start_year) # pragma: no cover else: # pragma: no cover records1 = Records() # pragma: no cover if baseline: if not reform: print("Running current law policy baseline") else: print("Baseline policy is: ", reform) else: if not reform: print("Running with current law as reform") else: print("Reform policy is: ", reform) print("TYPE", type(reform)) policy1.implement_reform(reform) # the default set up increments year to 2013 calc1 = Calculator(records=records1, policy=policy1) # Check that start_year is appropriate if calculator_start_year > TC_LAST_YEAR: raise RuntimeError("Start year is beyond data extrapolation.") return calc1
def dropq_calculate(year_n, start_year, taxrec_df, user_mods, behavior_allowed, mask_computed): """ The dropq_calculate function assumes specified user_mods is a dictionary returned by the Calculator.read_json_parameter_files() function with an extra key:value pair that is specified as 'gdp_elasticity': {'value': <float_value>}. The function returns (calc1, calc2, mask) where calc1 is pre-reform Calculator object calculated for year_n, calc2 is post-reform Calculator object calculated for year_n, and mask is boolean array if compute_mask=True or None otherwise """ # pylint: disable=too-many-arguments,too-many-locals,too-many-statements check_user_mods(user_mods) # specify Consumption instance consump = Consumption() consump_assumptions = user_mods['consumption'] consump.update_consumption(consump_assumptions) # specify growdiff_baseline and growdiff_response growdiff_baseline = Growdiff() growdiff_response = Growdiff() growdiff_base_assumps = user_mods['growdiff_baseline'] growdiff_resp_assumps = user_mods['growdiff_response'] growdiff_baseline.update_growdiff(growdiff_base_assumps) growdiff_response.update_growdiff(growdiff_resp_assumps) # create pre-reform and post-reform Growfactors instances growfactors_pre = Growfactors() growdiff_baseline.apply_to(growfactors_pre) growfactors_post = Growfactors() growdiff_baseline.apply_to(growfactors_post) growdiff_response.apply_to(growfactors_post) # create pre-reform Calculator instance recs1 = Records(data=taxrec_df.copy(deep=True), gfactors=growfactors_pre) policy1 = Policy(gfactors=growfactors_pre) calc1 = Calculator(policy=policy1, records=recs1, consumption=consump) while calc1.current_year < start_year: calc1.increment_year() calc1.calc_all() assert calc1.current_year == start_year # optionally compute mask if mask_computed: # create pre-reform Calculator instance with extra income recs1p = Records(data=taxrec_df.copy(deep=True), gfactors=growfactors_pre) # add one dollar to total wages and salaries of each filing unit recs1p.e00200 += 1.0 # pylint: disable=no-member recs1p.e00200p += 1.0 # pylint: disable=no-member policy1p = Policy(gfactors=growfactors_pre) # create Calculator with recs1p and calculate for start_year calc1p = Calculator(policy=policy1p, records=recs1p, consumption=consump) while calc1p.current_year < start_year: calc1p.increment_year() calc1p.calc_all() assert calc1p.current_year == start_year # compute mask that shows which of the calc1 and calc1p results differ res1 = results(calc1.records) res1p = results(calc1p.records) mask = (res1.iitax != res1p.iitax) else: mask = None # specify Behavior instance behv = Behavior() behavior_assumps = user_mods['behavior'] behv.update_behavior(behavior_assumps) # always prevent both behavioral response and growdiff response if behv.has_any_response() and growdiff_response.has_any_response(): msg = 'BOTH behavior AND growdiff_response HAVE RESPONSE' raise ValueError(msg) # optionally prevent behavioral response if behv.has_any_response() and not behavior_allowed: msg = 'A behavior RESPONSE IS NOT ALLOWED' raise ValueError(msg) # create post-reform Calculator instance recs2 = Records(data=taxrec_df.copy(deep=True), gfactors=growfactors_post) policy2 = Policy(gfactors=growfactors_post) policy_reform = user_mods['policy'] policy2.implement_reform(policy_reform) calc2 = Calculator(policy=policy2, records=recs2, consumption=consump, behavior=behv) while calc2.current_year < start_year: calc2.increment_year() calc2.calc_all() assert calc2.current_year == start_year # increment Calculator objects for year_n years and calculate for _ in range(0, year_n): calc1.increment_year() calc2.increment_year() calc1.calc_all() if calc2.behavior.has_response(): calc2 = Behavior.response(calc1, calc2) else: calc2.calc_all() # return calculated Calculator objects and mask return (calc1, calc2, mask)
def test_n65(cps_subsample): recs = Records.cps_constructor(data=cps_subsample) calc = Calculator(policy=Policy(), records=recs) assert calc.n65().sum() > 1500
def test_behavioral_response_calculator(cps_subsample): # create Records object rec = Records.cps_constructor(data=cps_subsample) year = rec.current_year # create Policy object pol = Policy() # create current-law Calculator object calc1 = Calculator(policy=pol, records=rec) # implement policy reform reform = {year: {'_II_rt7': [0.496], '_PT_rt7': [0.496]}} pol.implement_reform(reform) # create reform Calculator object with no behavioral response behv = Behavior() calc2 = Calculator(policy=pol, records=rec, behavior=behv) # test incorrect use of Behavior._mtr12 method with pytest.raises(ValueError): Behavior._mtr12(calc1, calc2, mtr_of='e00200p', tax_type='nonsense') # vary substitution and income effects in Behavior object behavior0 = { year: { '_BE_sub': [0.0], '_BE_cg': [0.0], '_BE_charity': [[0.0, 0.0, 0.0]] } } behv0 = Behavior() behv0.update_behavior(behavior0) calc2 = Calculator(policy=pol, records=rec, behavior=behv0) assert calc2.behavior_has_response() is False calc2_behv0 = Behavior.response(calc1, calc2) behavior1 = { year: { '_BE_sub': [0.3], '_BE_inc': [-0.1], '_BE_cg': [0.0], '_BE_subinc_wrt_earnings': [True] } } behv1 = Behavior() behv1.update_behavior(behavior1) calc2 = Calculator(policy=pol, records=rec, behavior=behv1) assert calc2.behavior_has_response() is True epsilon = 1e-9 assert abs(calc2.behavior('BE_sub') - 0.3) < epsilon calc2.behavior('BE_sub', 0.3) assert abs(calc2.behavior('BE_sub') - 0.3) < epsilon assert abs(calc2.behavior('BE_inc') - -0.1) < epsilon assert abs(calc2.behavior('BE_cg') - 0.0) < epsilon calc2_behv1 = Behavior.response(calc1, calc2) behavior2 = {year: {'_BE_sub': [0.5], '_BE_cg': [-0.8]}} behv2 = Behavior() behv2.update_behavior(behavior2) calc2 = Calculator(policy=pol, records=rec, behavior=behv2) assert calc2.behavior_has_response() is True calc2_behv2 = Behavior.response(calc1, calc2, trace=True) behavior3 = {year: {'_BE_inc': [-0.2], '_BE_cg': [-0.8]}} behv3 = Behavior() behv3.update_behavior(behavior3) calc2 = Calculator(policy=pol, records=rec, behavior=behv3) assert calc2.behavior_has_response() is True calc2_behv3 = Behavior.response(calc1, calc2) behavior4 = {year: {'_BE_cg': [-0.8]}} behv4 = Behavior() behv4.update_behavior(behavior4) calc2 = Calculator(policy=pol, records=rec, behavior=behv4) assert calc2.behavior_has_response() is True calc2_behv4 = Behavior.response(calc1, calc2) behavior5 = {year: {'_BE_charity': [[-0.5, -0.5, -0.5]]}} behv5 = Behavior() behv5.update_behavior(behavior5) calc2 = Calculator(policy=pol, records=rec, behavior=behv5) assert calc2.behavior_has_response() is True calc2_behv5 = Behavior.response(calc1, calc2) # check that total income tax liability differs across the # six sets of behavioral-response elasticities assert (calc2_behv0.weighted_total('iitax') != calc2_behv1.weighted_total('iitax') != calc2_behv2.weighted_total('iitax') != calc2_behv3.weighted_total('iitax') != calc2_behv4.weighted_total('iitax') != calc2_behv5.weighted_total('iitax'))
def test_calculator_attr_access_to_policy(cps_subsample): rec = Records.cps_constructor(data=cps_subsample) calc = Calculator(policy=Policy(), records=rec) assert hasattr(calc.records, 'c01000') assert hasattr(calc.policy, '_AMT_Child_em') assert hasattr(calc, 'policy')
def main(reform_year, calc_year, sub_elasticity, inc_elasticity, cg_elasticity): """ Highest-level logic of behavior.py script that produces Tax-Calculator behavioral-response results running the taxcalc package on this computer. """ # pylint: disable=too-many-locals # pylint: disable=protected-access if not os.path.isfile(PUFCSV_PATH): sys.stderr.write('ERROR: file {} does not exist\n'.format(PUFCSV_PATH)) return 1 # specify policy reform reform_dict = { reform_year: { '_SS_Earnings_c': [1.0e99], '_CG_rt1': [0.01], # clp ==> 0.00 '_CG_rt2': [0.16], # clp ==> 0.15 '_CG_rt3': [0.21] } } # clp ==> 0.20 msg = 'REFORM: pop-the-cap + cg-rate-up-one-percent in {}\n' sys.stdout.write(msg.format(reform_year)) # create reform-policy object ref = Policy() ref.implement_reform(reform_dict) # create behavioral-response object behv = Behavior() # create reform-policy Calculator object with behavioral responses calc_ref = Calculator(policy=ref, verbose=False, behavior=behv, records=Records(data=PUFCSV_PATH)) cyr = calc_year # (a) with all behavioral-reponse parameters set to zero assert not calc_ref.behavior.has_response() itax_s, ptax_s, ltcg_s = results(cyr, calc_ref) # (b) with behavioral-reponse parameters set to those specified in call behv_params = { behv.start_year: { '_BE_sub': [sub_elasticity], '_BE_inc': [inc_elasticity], '_BE_cg': [cg_elasticity] } } behv.update_behavior(behv_params) # now used by calc_ref object itax_d, ptax_d, ltcg_d = results(cyr, calc_ref) # dynamic analysis # write results to stdout bhv = '{},SUB_ELAST,INC_ELAST,CG_ELAST= {} {} {}\n' yridx = cyr - behv.start_year sys.stdout.write( bhv.format(cyr, behv._BE_sub[yridx], behv._BE_inc[yridx], behv._BE_cg[yridx])) res = '{},{},{}_STATIC(S),{}_DYNAMIC(D),D-S= {:.1f} {:.1f} {:.1f}\n' sys.stdout.write( res.format(cyr, 'ITAX', 'REV', 'REV', itax_s, itax_d, itax_d - itax_s)) sys.stdout.write( res.format(cyr, 'PTAX', 'REV', 'REV', ptax_s, ptax_d, ptax_d - ptax_s)) sys.stdout.write( res.format(cyr, 'LTCG', 'AGG', 'AGG', ltcg_s, ltcg_d, ltcg_d - ltcg_s)) # return no-error exit code return 0
def test_make_calculator_raises_on_no_policy(cps_subsample): rec = Records.cps_constructor(data=cps_subsample) with pytest.raises(ValueError): Calculator(records=rec)
def test_make_Calculator_raises_on_no_policy(): rec = Records(data=TAXDATA, weights=WEIGHTS, start_year=2013) with pytest.raises(ValueError): calc = Calculator(records=rec)
def test_diagnostic_table(cps_subsample): recs = Records.cps_constructor(data=cps_subsample) calc = Calculator(policy=Policy(), records=recs) adt = calc.diagnostic_table(3) assert isinstance(adt, pd.DataFrame)
def test_reform_json_and_output(tests_path): """ Check that each JSON reform file can be converted into a reform dictionary that can then be passed to the Policy class implement_reform method that generates no parameter_errors. Then use each reform to generate static tax results for small set of filing units in a single tax_year and compare those results with expected results from a text file. """ # pylint: disable=too-many-statements,too-many-locals used_dist_stats = [ 'c00100', # AGI 'c04600', # personal exemptions 'standard', # standard deduction 'c04800', # regular taxable income 'c05800', # income tax before credits 'iitax', # income tax after credits 'payrolltax', # payroll taxes 'aftertax_income' ] # aftertax expanded income unused_dist_stats = set(DIST_TABLE_COLUMNS) - set(used_dist_stats) renamed_columns = { 'c00100': 'AGI', 'c04600': 'pexempt', 'standard': 'stdded', 'c04800': 'taxinc', 'c05800': 'tax-wo-credits', 'iitax': 'inctax', 'payrolltax': 'paytax', 'aftertax_income': 'ataxinc' } # embedded function used only in test_reform_json_and_output def write_distribution_table(calc, resfilename): """ Write abbreviated distribution table calc to file with resfilename. """ dist, _ = calc.distribution_tables(None, 'standard_income_bins', scaling=False) for stat in unused_dist_stats: del dist[stat] dist = dist[used_dist_stats] dist.rename(mapper=renamed_columns, axis='columns', inplace=True) with open(resfilename, 'w') as resfile: dist.to_string(resfile, float_format='%7.0f') # embedded function used only in test_reform_json_and_output def res_and_out_are_same(base): """ Return True if base.res and base.out file contents are the same; return False if base.res and base.out file contents differ. """ with open(base + '.res') as resfile: act_res = resfile.read() with open(base + '.out') as outfile: exp_res = outfile.read() # check to see if act_res & exp_res have differences return not nonsmall_diffs(act_res.splitlines(True), exp_res.splitlines(True)) # specify Records object containing cases data tax_year = 2020 cases_path = os.path.join(tests_path, '..', 'reforms', 'cases.csv') cases = Records( data=cases_path, gfactors=None, # keeps raw data unchanged weights=None, adjust_ratios=None, start_year=tax_year) # set raw input data year # specify list of reform failures failures = list() # specify current-law-policy Calculator object calc1 = Calculator(policy=Policy(), records=cases, verbose=False) calc1.advance_to_year(tax_year) calc1.calc_all() res_path = cases_path.replace('cases.csv', 'clp.res') write_distribution_table(calc1, res_path) if res_and_out_are_same(res_path.replace('.res', '')): os.remove(res_path) else: failures.append(res_path) # read 2017_law.json reform file and specify its parameters dictionary pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) # check reform file contents and reform results for each reform reforms_path = os.path.join(tests_path, '..', 'reforms', '*.json') json_reform_files = glob.glob(reforms_path) for jrf in json_reform_files: # determine reform's baseline by reading contents of jrf with open(jrf, 'r') as rfile: jrf_text = rfile.read() pre_tcja_baseline = 'Reform_Baseline: 2017_law.json' in jrf_text # implement the reform relative to its baseline reform = Calculator.read_json_param_objects(jrf_text, None) pol = Policy() # current-law policy if pre_tcja_baseline: pol.implement_reform(pre_tcja['policy']) pol.implement_reform(reform['policy']) assert not pol.parameter_errors calc2 = Calculator(policy=pol, records=cases, verbose=False) calc2.advance_to_year(tax_year) calc2.calc_all() res_path = jrf.replace('.json', '.res') write_distribution_table(calc2, res_path) if res_and_out_are_same(res_path.replace('.res', '')): os.remove(res_path) else: failures.append(res_path) if failures: msg = 'Following reforms have res-vs-out differences:\n' for ref in failures: msg += '{}\n'.format(os.path.basename(ref)) raise ValueError(msg)