def test_Calculator_diagnostic_table(): puf = Records(data=TAXDATA, weights=WEIGHTS, start_year=Records.PUF_YEAR) beh = Behavior() beh.update_behavior({2013: {'_BE_sub': [0.4]}}) assert beh.has_response() calc = Calculator(policy=Policy(), records=puf, behavior=beh) calc.diagnostic_table(base_calc=calc)
def test_Calculator_diagnostic_table_no_mutation(): policy_x = Policy() record_x = Records(data=TAX_DTA, weights=WEIGHTS, start_year=Records.PUF_YEAR) policy_y = Policy() record_y = Records(data=TAX_DTA, weights=WEIGHTS, start_year=Records.PUF_YEAR) calc_x = Calculator(policy=policy_x, records=record_x) calc_y = Calculator(policy=policy_y, records=record_y) x_start = calc_x.current_year y_start = calc_y.current_year calc_y.diagnostic_table(base_calc=calc_x) assert calc_y.current_year == y_start assert calc_x.current_year == x_start
def test_Calculator_diagnostic_table_no_mutation(): policy_x = Policy() record_x = Records(data=TAXDATA, weights=WEIGHTS, start_year=Records.PUF_YEAR) policy_y = Policy() record_y = Records(data=TAXDATA, weights=WEIGHTS, start_year=Records.PUF_YEAR) calc_x = Calculator(policy=policy_x, records=record_x) calc_y = Calculator(policy=policy_y, records=record_y) x_start = calc_x.current_year y_start = calc_y.current_year calc_y.diagnostic_table(base_calc=calc_x) assert calc_y.current_year == y_start assert calc_x.current_year == x_start
def test_dist_table_sum_row(cps_subsample): rec = Records.cps_constructor(data=cps_subsample) calc = Calculator(policy=Policy(), records=rec) calc.calc_all() # create three distribution tables and compare the ALL row contents tb1, _ = calc.distribution_tables(None, 'standard_income_bins') tb2, _ = calc.distribution_tables(None, 'soi_agi_bins') tb3, _ = calc.distribution_tables(None, 'weighted_deciles') tb4, _ = calc.distribution_tables(None, 'weighted_deciles', pop_quantiles=True) assert np.allclose(tb1.loc['ALL'], tb2.loc['ALL']) assert np.allclose(tb1.loc['ALL'], tb3.loc['ALL']) # make sure population count is larger than filing-unit count assert tb4.at['ALL', 'count'] > tb1.at['ALL', 'count'] # make sure population table has same ALL row values as filing-unit table for col in ['count', 'count_StandardDed', 'count_ItemDed', 'count_AMT']: tb4.at['ALL', col] = tb1.at['ALL', col] assert np.allclose(tb1.loc['ALL'], tb4.loc['ALL']) # make sure population table has same ALL tax liabilities as diagnostic tbl dgt = calc.diagnostic_table(1) assert np.allclose([tb4.at['ALL', 'iitax'], tb4.at['ALL', 'payrolltax']], [ dgt.at['Ind Income Tax ($b)', calc.current_year], dgt.at['Payroll Taxes ($b)', calc.current_year] ])
def test_diagnostic_table(cps_subsample): """ Test diagnostic_table method. """ recs = Records.cps_constructor(data=cps_subsample) calc = Calculator(policy=Policy(), records=recs) adt = calc.diagnostic_table(3) assert isinstance(adt, pd.DataFrame)
def taxcalc_clp_results(): """ Use taxcalc package on this computer to compute aggregate income tax and payroll tax revenues for years beginning with MIN_START_YEAR and ending with MAX_START_YEAR+NUMBER_OF_YEARS-1 for current-law policy. Return two aggregate revenue dictionaries indexed by calendar year. """ calc = Calculator(policy=Policy(), records=Records(data=PUF_PATH)) nyrs = MAX_START_YEAR + NUMBER_OF_YEARS - MIN_START_YEAR adt = calc.diagnostic_table(num_years=nyrs) # note that adt is Pandas DataFrame object return (adt.xs('Ind inc tax ($b)').to_dict(), adt.xs('Payroll tax ($b)').to_dict())
def test_with_pufcsv(puf_fullsample): # specify usermods dictionary in code start_year = 2017 reform_year = start_year analysis_year = 2026 year_n = analysis_year - start_year reform = { '_FICA_ss_trt': [0.2] } usermods = dict() usermods['policy'] = {reform_year: reform} usermods['consumption'] = {} usermods['behavior'] = {} usermods['growdiff_baseline'] = {} usermods['growdiff_response'] = {} usermods['growmodel'] = {} seed = random_seed(usermods) assert seed == 580419828 # create a Policy object (pol) containing reform policy parameters pol = Policy() pol.implement_reform(usermods['policy']) # create a Records object (rec) containing all puf.csv input records rec = Records(data=puf_fullsample) # create a Calculator object using clp policy and puf records calc = Calculator(policy=pol, records=rec) while calc.current_year < analysis_year: calc.increment_year() # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(1) taxes_fullsample = adt.loc["Combined Liability ($b)"] assert taxes_fullsample is not None fulls_reform_revenue = float(taxes_fullsample.loc[analysis_year]) # call run_nth_year_tax_calc_model function resdict = run_nth_year_taxcalc_model(year_n, start_year, use_puf_not_cps=True, use_full_sample=True, user_mods=usermods, return_dict=True) total = resdict['aggr_2'] tbi_reform_revenue = float(total['combined_tax_9']) * 1e-9 # assert that tbi revenue is similar to the fullsample calculation diff = abs(fulls_reform_revenue - tbi_reform_revenue) proportional_diff = diff / fulls_reform_revenue frmt = 'f,d,adiff,pdiff= {:.4f} {:.4f} {:.4f} {}' print(frmt.format(fulls_reform_revenue, tbi_reform_revenue, diff, proportional_diff)) assert proportional_diff < 0.0001 # one-hundredth of one percent
def test_with_pufcsv(puf_fullsample): # specify usermods dictionary in code start_year = 2017 reform_year = start_year analysis_year = 2026 year_n = analysis_year - start_year reform = { '_FICA_ss_trt': [0.2] } usermods = dict() usermods['policy'] = {reform_year: reform} usermods['consumption'] = {} usermods['growdiff_baseline'] = {} usermods['growdiff_response'] = {} seed = random_seed(usermods) assert seed == 2568216296 # create a Policy object (pol) containing reform policy parameters pol = Policy() pol.implement_reform(usermods['policy']) # create a Records object (rec) containing all puf.csv input records rec = Records(data=puf_fullsample) # create a Calculator object using clp policy and puf records calc = Calculator(policy=pol, records=rec) while calc.current_year < analysis_year: calc.increment_year() # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(1) taxes_fullsample = adt.loc["Combined Liability ($b)"] assert taxes_fullsample is not None fulls_reform_revenue = float(taxes_fullsample.loc[analysis_year]) # call run_nth_year_tax_calc_model function resdict = run_nth_year_taxcalc_model(year_n, start_year, use_puf_not_cps=True, use_full_sample=True, user_mods=usermods, return_dict=True) total = resdict['aggr_2'] tbi_reform_revenue = float(total['combined_tax_9']) # assert that tbi revenue is similar to the fullsample calculation diff = abs(fulls_reform_revenue - tbi_reform_revenue) proportional_diff = diff / fulls_reform_revenue frmt = 'f,d,adiff,pdiff= {:.4f} {:.4f} {:.4f} {}' print(frmt.format(fulls_reform_revenue, tbi_reform_revenue, diff, proportional_diff)) assert proportional_diff < 0.0001 # one-hundredth of one percent
def test_agg(): """ Test Tax-Calculator aggregate taxes with no policy reform using puf.csv """ # create a Policy object (clp) containing current-law policy parameters clp = Policy() # create a Records object (puf) containing puf.csv input records puf = Records(data=PUFCSV_PATH) # create a Calculator object using clp policy and puf records calc = Calculator(policy=clp, records=puf) # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(num_years=10) # convert adt results to a string with a trailing EOL character adtstr = adt.to_string() + '\n' # generate differences between actual and expected results actual = adtstr.splitlines(True) with open(AGGRES_PATH, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expected = expected_results.splitlines(True) diff = difflib.unified_diff(expected, actual, fromfile='expected', tofile='actual', n=0) # convert diff generator into a list of lines: diff_lines = list() for line in diff: diff_lines.append(line) # test failure if there are any diff_lines if len(diff_lines) > 0: new_filename = '{}{}'.format(AGGRES_PATH[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(adtstr) sys.stdout.write('*************************************************\n') sys.stdout.write('*** NEW RESULTS IN pufcsv_agg_actual.txt FILE ***\n') sys.stdout.write('*** if new OK, copy pufcsv_agg_actual.txt to ***\n') sys.stdout.write('*** pufcsv_agg_expect.txt ***\n') sys.stdout.write('*** and rerun test. ***\n') sys.stdout.write('*************************************************\n') assert False
def taxcalc_results(start_year, reform_dict, itax_clp, fica_clp): """ Use taxcalc package on this computer to compute aggregate income tax and payroll tax revenue difference (between reform and current-law policy) for ten years beginning with the specified start_year using the specified reform_dict dictionary and the two specified current-law-policy results dictionaries. Return two aggregate tax revenue difference dictionaries indexed by calendar year. """ pol = Policy() pol.implement_reform(reform_dict) calc = Calculator(policy=pol, records=Records(data=PUF_PATH)) calc.advance_to_year(start_year) adt = calc.diagnostic_table(num_years=NUMBER_OF_YEARS) # note that adt is Pandas DataFrame object itax_ref = adt.xs('Ind inc tax ($b)').to_dict() fica_ref = adt.xs('Payroll tax ($b)').to_dict() itax_diff = {} fica_diff = {} for year in itax_ref: itax_diff[year] = round(itax_ref[year] - itax_clp[year], 1) fica_diff[year] = round(fica_ref[year] - fica_clp[year], 1) return (itax_diff, fica_diff)
def test_dist_table_sum_row(cps_subsample): rec = Records.cps_constructor(data=cps_subsample) calc = Calculator(policy=Policy(), records=rec) calc.calc_all() # create three distribution tables and compare the ALL row contents tb1, _ = calc.distribution_tables(None, 'standard_income_bins') tb2, _ = calc.distribution_tables(None, 'soi_agi_bins') tb3, _ = calc.distribution_tables(None, 'weighted_deciles') tb4, _ = calc.distribution_tables(None, 'weighted_deciles', pop_quantiles=True) assert np.allclose(tb1.loc['ALL'], tb2.loc['ALL']) assert np.allclose(tb1.loc['ALL'], tb3.loc['ALL']) # make sure population count is larger than filing-unit count assert tb4.at['ALL', 'count'] > tb1.at['ALL', 'count'] # make sure population table has same ALL row values as filing-unit table for col in ['count', 'count_StandardDed', 'count_ItemDed', 'count_AMT']: tb4.at['ALL', col] = tb1.at['ALL', col] assert np.allclose(tb1.loc['ALL'], tb4.loc['ALL']) # make sure population table has same ALL tax liabilities as diagnostic tbl dgt = calc.diagnostic_table(1) assert np.allclose([tb4.at['ALL', 'iitax'], tb4.at['ALL', 'payrolltax']], [dgt.at['Ind Income Tax ($b)', calc.current_year], dgt.at['Payroll Taxes ($b)', calc.current_year]])
def test_agg(tests_path, cps_fullsample): """ Test current-law aggregate taxes using cps.csv file. """ # pylint: disable=too-many-statements,too-many-locals nyrs = 10 # create a baseline Policy object containing current law policy parameters baseline_policy = Policy() # create a Records object (rec) containing all cps.csv input records recs = Records.cps_constructor(data=cps_fullsample) # create a Calculator object using baseline policy and cps records calc = Calculator(policy=baseline_policy, records=recs) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt to a string with a trailing EOL character actual_results = adt.to_string() + '\n' # read expected results from file aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt # ensure actual and expected results have no nonsmall differences diffs = nonsmall_diffs(actual_results.splitlines(True), expected_results.splitlines(True)) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(actual_results) msg = 'CPSCSV AGG RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy cpscsv_agg_actual.txt to ---\n' msg += '--- cpscsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.03 # sub-sample fraction subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed) recs_subsample = Records.cps_constructor(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] msg = '' for cyr in range(calc_start_year, calc_start_year + nyrs): if cyr == calc_start_year: reltol = 0.014 else: reltol = 0.006 if not np.allclose( taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0, rtol=reltol): reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1. line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}' line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}' line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}' msg += line1.format(cyr) msg += line2.format(subfrac, reltol, rn_seed) msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr], reldiff) if msg: raise ValueError(msg)
def test_agg(tests_path, cps_fullsample): """ Test current-law aggregate taxes using cps.csv file. """ # pylint: disable=too-many-statements,too-many-locals nyrs = 10 # create a baseline Policy object with current-law policy parameters baseline_policy = Policy() # create a Records object (rec) containing all cps.csv input records recs = Records.cps_constructor(data=cps_fullsample) # create a Calculator object using baseline policy and cps records calc = Calculator(policy=baseline_policy, records=recs) calc.advance_to_year(START_YEAR) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs).round(1) # column labels are int taxes_fullsample = adt.loc["Combined Liability ($b)"] # compare actual DataFrame, adt, with the expected DataFrame, edt aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.csv') edt = pd.read_csv(aggres_path, index_col=False) # column labels are str edt.drop('Unnamed: 0', axis='columns', inplace=True) assert len(adt.columns.values) == len(edt.columns.values) diffs = False for icol in adt.columns.values: if not np.allclose(adt[icol], edt[str(icol)]): diffs = True if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.csv') adt.to_csv(new_filename, float_format='%.1f') msg = 'CPSCSV AGG RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN cpscsv_agg_actual.csv FILE ---\n' msg += '--- if new OK, copy cpscsv_agg_actual.csv to ---\n' msg += '--- cpscsv_agg_expect.csv ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.03 # sub-sample fraction subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed) recs_subsample = Records.cps_constructor(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample) calc_subsample.advance_to_year(START_YEAR) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] msg = '' for cyr in range(calc_start_year, calc_start_year + nyrs): if cyr == calc_start_year: reltol = 0.0141 else: reltol = 0.0105 if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0, rtol=reltol): reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1. line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}' line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}' line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}' msg += line1.format(cyr) msg += line2.format(subfrac, reltol, rn_seed) msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr], reldiff) if msg: raise ValueError(msg)
def test_agg(tests_path, cps_fullsample): """ Test current-law aggregate taxes using cps.csv file. """ # pylint: disable=too-many-statements,too-many-locals nyrs = 10 # create a baseline Policy object with current-law policy parameters baseline_policy = Policy() # create a Records object (rec) containing all cps.csv input records recs = Records.cps_constructor(data=cps_fullsample) # create a Calculator object using baseline policy and cps records calc = Calculator(policy=baseline_policy, records=recs) calc.advance_to_year(START_YEAR) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs).round(1) # column labels are int taxes_fullsample = adt.loc["Combined Liability ($b)"] # compare actual DataFrame, adt, with the expected DataFrame, edt aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.csv') edt = pd.read_csv(aggres_path, index_col=False) # column labels are str edt.drop('Unnamed: 0', axis='columns', inplace=True) assert len(adt.columns.values) == len(edt.columns.values) diffs = False for icol in adt.columns.values: if not np.allclose(adt[icol], edt[str(icol)]): diffs = True if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.csv') adt.to_csv(new_filename, float_format='%.1f') msg = 'CPSCSV AGG RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN cpscsv_agg_actual.csv FILE ---\n' msg += '--- if new OK, copy cpscsv_agg_actual.csv to ---\n' msg += '--- cpscsv_agg_expect.csv ---\n' msg += '--- and rerun test. ---\n' msg += '--- (both are in taxcalc/tests) ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.03 # sub-sample fraction subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed) recs_subsample = Records.cps_constructor(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample) calc_subsample.advance_to_year(START_YEAR) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] msg = '' for cyr in range(calc_start_year, calc_start_year + nyrs): if cyr == calc_start_year: reltol = 0.0141 else: reltol = 0.0105 if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0, rtol=reltol): reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1. line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}' line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}' line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}' msg += line1.format(cyr) msg += line2.format(subfrac, reltol, rn_seed) msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr], reldiff) if msg: raise ValueError(msg)
def test_agg(tests_path, puf_fullsample): """ Test Tax-Calculator aggregate taxes with no policy reform using the full-sample puf.csv and a small sub-sample of puf.csv """ # pylint: disable=too-many-locals,too-many-statements nyrs = 10 # create a Policy object (clp) containing current-law policy parameters clp = Policy() # create a Records object (rec) containing all puf.csv input records rec = Records(data=puf_fullsample) # create a Calculator object using clp policy and puf records calc = Calculator(policy=clp, records=rec) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt results to a string with a trailing EOL character adtstr = adt.to_string() + '\n' # generate differences between actual and expected results actual = adtstr.splitlines(True) aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expected = expected_results.splitlines(True) diff = difflib.unified_diff(expected, actual, fromfile='expected', tofile='actual', n=0) # convert diff generator into a list of lines: diff_lines = list() for line in diff: diff_lines.append(line) # test failure if there are any diff_lines if diff_lines: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(adtstr) msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_agg_actual.txt to ---\n' msg += '--- pufcsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records fullsample = puf_fullsample rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.05 # sub-sample fraction subsample = fullsample.sample(frac=subfrac, random_state=rn_seed) rec_subsample = Records(data=subsample) calc_subsample = Calculator(policy=Policy(), records=rec_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] reltol = 0.01 # maximum allowed relative difference in tax liability if not np.allclose(taxes_subsample, taxes_fullsample, atol=0.0, rtol=reltol): msg = 'PUFCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n' msg += 'WHEN subfrac={:.3f}, rtol={:.4f}, seed={}\n'.format(subfrac, reltol, rn_seed) it_sub = np.nditer(taxes_subsample, flags=['f_index']) it_all = np.nditer(taxes_fullsample, flags=['f_index']) while not it_sub.finished: cyr = it_sub.index + calc_start_year tax_sub = float(it_sub[0]) tax_all = float(it_all[0]) reldiff = abs(tax_sub - tax_all) / abs(tax_all) if reldiff > reltol: msgstr = ' year,sub,full,reldiff= {}\t{:.2f}\t{:.2f}\t{:.4f}\n' msg += msgstr.format(cyr, tax_sub, tax_all, reldiff) it_sub.iternext() it_all.iternext() raise ValueError(msg)
def test_agg(tests_path): """ Test current-law aggregate taxes using cps.csv file. """ # pylint: disable=too-many-statements,too-many-locals nyrs = 10 # create a baseline Policy object containing 2017_law.json parameters pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) baseline_policy = Policy() baseline_policy.implement_reform(pre_tcja['policy']) # create a Records object (rec) containing all cps.csv input records rec = Records.cps_constructor() # create a Calculator object using baseline policy and cps records calc = Calculator(policy=baseline_policy, records=rec) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt to a string with a trailing EOL character actual_results = adt.to_string() + '\n' # read expected results from file aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt # ensure actual and expected results have no nonsmall differences if sys.version_info.major == 2: small = 0.0 # tighter test for Python 2.7 else: small = 0.1 # looser test for Python 3.6 diffs = nonsmall_diffs(actual_results.splitlines(True), expected_results.splitlines(True), small) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(actual_results) msg = 'CPSCSV AGG RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy cpscsv_agg_actual.txt to ---\n' msg += '--- cpscsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records cps_filepath = os.path.join(tests_path, '..', 'cps.csv.gz') fullsample = pd.read_csv(cps_filepath) rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.03 # sub-sample fraction subsample = fullsample.sample(frac=subfrac, random_state=rn_seed) rec_subsample = Records(data=subsample, gfactors=Growfactors(), weights=Records.CPS_WEIGHTS_FILENAME, adjust_ratios=Records.CPS_RATIOS_FILENAME, start_year=Records.CPSCSV_YEAR) calc_subsample = Calculator(policy=baseline_policy, records=rec_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] reltol = 0.01 # maximum allowed relative difference in tax liability # TODO: skip first year because of BUG in cps_weights.csv file taxes_subsample = taxes_subsample[1:] # TODO: eliminate code taxes_fullsample = taxes_fullsample[1:] # TODO: eliminate code if not np.allclose( taxes_subsample, taxes_fullsample, atol=0.0, rtol=reltol): msg = 'CPSCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n' msg += 'WHEN subfrac={:.3f}, rtol={:.4f}, seed={}\n'.format( subfrac, reltol, rn_seed) it_sub = np.nditer(taxes_subsample, flags=['f_index']) it_all = np.nditer(taxes_fullsample, flags=['f_index']) while not it_sub.finished: cyr = it_sub.index + calc_start_year tax_sub = float(it_sub[0]) tax_all = float(it_all[0]) reldiff = abs(tax_sub - tax_all) / abs(tax_all) if reldiff > reltol: msgstr = ' year,sub,full,reldiff= {}\t{:.2f}\t{:.2f}\t{:.4f}\n' msg += msgstr.format(cyr, tax_sub, tax_all, reldiff) it_sub.iternext() it_all.iternext() raise ValueError(msg)
def test_agg(tests_path, puf_fullsample): """ Test Tax-Calculator aggregate taxes with no policy reform using the full-sample puf.csv and a small sub-sample of puf.csv """ # pylint: disable=too-many-locals,too-many-statements nyrs = 10 # create a baseline Policy object containing 2017_law.json parameters pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) baseline_policy = Policy() baseline_policy.implement_reform(pre_tcja['policy']) # create a Records object (rec) containing all puf.csv input records recs = Records(data=puf_fullsample) # create a Calculator object using baseline policy and puf records calc = Calculator(policy=baseline_policy, records=recs) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt results to a string with a trailing EOL character adtstr = adt.to_string(float_format='%8.1f') + '\n' # create actual and expected lists of diagnostic table lines actual = adtstr.splitlines(True) aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expect = expected_results.splitlines(True) # ensure actual and expect lines have differences no more than small value diffs = nonsmall_diffs(actual, expect) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(adtstr) msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_agg_actual.txt to ---\n' msg += '--- pufcsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records fullsample = puf_fullsample rn_seed = 2222 # to ensure sub-sample is always the same subfrac = 0.05 # sub-sample fraction subsample = fullsample.sample(frac=subfrac, random_state=rn_seed) recs_subsample = Records(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] msg = '' for cyr in range(calc_start_year, calc_start_year + nyrs): reltol = 0.01 # maximum allowed relative difference in tax liability if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0, rtol=reltol): reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1. line1 = '\nPUFCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}' line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}' line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}' msg += line1.format(cyr) msg += line2.format(subfrac, reltol, rn_seed) msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr], reldiff) if msg: raise ValueError(msg)
def test_diagnostic_table(cps_subsample): recs = Records.cps_constructor(data=cps_subsample, no_benefits=True) calc = Calculator(policy=Policy(), records=recs) adt = calc.diagnostic_table(3) assert isinstance(adt, pd.DataFrame)
def test_agg(tests_path, puf_fullsample): """ Test Tax-Calculator aggregate taxes with no policy reform using the full-sample puf.csv and a small sub-sample of puf.csv """ # pylint: disable=too-many-locals,too-many-statements nyrs = 10 # create a baseline Policy object containing 2017_law.json parameters pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) baseline_policy = Policy() baseline_policy.implement_reform(pre_tcja['policy']) # create a Records object (rec) containing all puf.csv input records recs = Records(data=puf_fullsample) # create a Calculator object using baseline policy and puf records calc = Calculator(policy=baseline_policy, records=recs) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt results to a string with a trailing EOL character adtstr = adt.to_string(float_format='%8.1f') + '\n' # create actual and expected lists of diagnostic table lines actual = adtstr.splitlines(True) aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expect = expected_results.splitlines(True) # ensure actual and expect lines have differences no more than small value diffs = nonsmall_diffs(actual, expect) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(adtstr) msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_agg_actual.txt to ---\n' msg += '--- pufcsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records fullsample = puf_fullsample rn_seed = 2222 # to ensure sub-sample is always the same subfrac = 0.05 # sub-sample fraction subsample = fullsample.sample(frac=subfrac, random_state=rn_seed) recs_subsample = Records(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] msg = '' for cyr in range(calc_start_year, calc_start_year + nyrs): reltol = 0.01 # maximum allowed relative difference in tax liability if not np.allclose( taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0, rtol=reltol): reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1. line1 = '\nPUFCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}' line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}' line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}' msg += line1.format(cyr) msg += line2.format(subfrac, reltol, rn_seed) msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr], reldiff) if msg: raise ValueError(msg)
def test_agg(tests_path, cps_fullsample): """ Test current-law aggregate taxes using cps.csv file. """ # pylint: disable=too-many-statements,too-many-locals nyrs = 10 # create a baseline Policy object containing 2017_law.json parameters pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) baseline_policy = Policy() baseline_policy.implement_reform(pre_tcja['policy']) # create a Records object (rec) containing all cps.csv input records recs = Records.cps_constructor(data=cps_fullsample) # create a Calculator object using baseline policy and cps records calc = Calculator(policy=baseline_policy, records=recs) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt to a string with a trailing EOL character actual_results = adt.to_string(float_format='%8.1f') + '\n' # read expected results from file aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt # ensure actual and expected results have no nonsmall differences diffs = nonsmall_diffs(actual_results.splitlines(True), expected_results.splitlines(True)) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(actual_results) msg = 'CPSCSV AGG RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy cpscsv_agg_actual.txt to ---\n' msg += '--- cpscsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.03 # sub-sample fraction subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed) recs_subsample = Records.cps_constructor(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] msg = '' for cyr in range(calc_start_year, calc_start_year + nyrs): if cyr == calc_start_year: reltol = 0.014 else: reltol = 0.006 if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0, rtol=reltol): reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1. line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}' line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}' line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}' msg += line1.format(cyr) msg += line2.format(subfrac, reltol, rn_seed) msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr], reldiff) if msg: raise ValueError(msg)
def test_Calculator_diagnostic_table(): policy = Policy() puf = Records(data=TAXDATA, weights=WEIGHTS, start_year=Records.PUF_YEAR) calc = Calculator(policy=policy, records=puf) calc.diagnostic_table()
def test_diagnostic_table(): policy = Policy() TAX_DTA.flpdyr += 18 # flpdyr==2009 so that Records ctor will apply blowup puf = Records(data=TAX_DTA, weights=WEIGHTS) calc = Calculator(policy=policy, records=puf) calc.diagnostic_table()
def test_agg(tests_path, puf_fullsample): """ Test Tax-Calculator aggregate taxes with no policy reform using the full-sample puf.csv and a small sub-sample of puf.csv """ # pylint: disable=too-many-locals,too-many-statements nyrs = 10 # create a baseline Policy object containing 2017_law.json parameters pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) baseline_policy = Policy() baseline_policy.implement_reform(pre_tcja['policy']) # create a Records object (rec) containing all puf.csv input records rec = Records(data=puf_fullsample) # create a Calculator object using baseline policy and puf records calc = Calculator(policy=baseline_policy, records=rec) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt results to a string with a trailing EOL character adtstr = adt.to_string() + '\n' # create actual and expected lists of diagnostic table lines actual = adtstr.splitlines(True) aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expect = expected_results.splitlines(True) # ensure actual and expect lines have differences no more than small value if sys.version_info.major == 2: small = 0.0 # tighter test for Python 2.7 else: small = 0.1 # looser test for Python 3.6 diffs = nonsmall_diffs(actual, expect, small) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(adtstr) msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_agg_actual.txt to ---\n' msg += '--- pufcsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records fullsample = puf_fullsample rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.05 # sub-sample fraction subsample = fullsample.sample(frac=subfrac, random_state=rn_seed) rec_subsample = Records(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=rec_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] reltol = 0.01 # maximum allowed relative difference in tax liability if not np.allclose(taxes_subsample, taxes_fullsample, atol=0.0, rtol=reltol): msg = 'PUFCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n' msg += 'WHEN subfrac={:.3f}, rtol={:.4f}, seed={}\n'.format(subfrac, reltol, rn_seed) it_sub = np.nditer(taxes_subsample, flags=['f_index']) it_all = np.nditer(taxes_fullsample, flags=['f_index']) while not it_sub.finished: cyr = it_sub.index + calc_start_year tax_sub = float(it_sub[0]) tax_all = float(it_all[0]) reldiff = abs(tax_sub - tax_all) / abs(tax_all) if reldiff > reltol: msgstr = ' year,sub,full,reldiff= {}\t{:.2f}\t{:.2f}\t{:.4f}\n' msg += msgstr.format(cyr, tax_sub, tax_all, reldiff) it_sub.iternext() it_all.iternext() raise ValueError(msg)
def test_Calculator_diagnostic_table(): policy = Policy() puf = Records(data=TAX_DTA, weights=WEIGHTS, start_year=Records.PUF_YEAR) calc = Calculator(policy=policy, records=puf) calc.diagnostic_table()