def test_Calculator_diagnostic_table():
    puf = Records(data=TAXDATA, weights=WEIGHTS, start_year=Records.PUF_YEAR)
    beh = Behavior()
    beh.update_behavior({2013: {'_BE_sub': [0.4]}})
    assert beh.has_response()
    calc = Calculator(policy=Policy(), records=puf, behavior=beh)
    calc.diagnostic_table(base_calc=calc)
Example #2
0
def test_Calculator_diagnostic_table_no_mutation():
    policy_x = Policy()
    record_x = Records(data=TAX_DTA, weights=WEIGHTS,
                       start_year=Records.PUF_YEAR)
    policy_y = Policy()
    record_y = Records(data=TAX_DTA, weights=WEIGHTS,
                       start_year=Records.PUF_YEAR)
    calc_x = Calculator(policy=policy_x, records=record_x)
    calc_y = Calculator(policy=policy_y, records=record_y)
    x_start = calc_x.current_year
    y_start = calc_y.current_year
    calc_y.diagnostic_table(base_calc=calc_x)
    assert calc_y.current_year == y_start
    assert calc_x.current_year == x_start
Example #3
0
def test_Calculator_diagnostic_table_no_mutation():
    policy_x = Policy()
    record_x = Records(data=TAXDATA,
                       weights=WEIGHTS,
                       start_year=Records.PUF_YEAR)
    policy_y = Policy()
    record_y = Records(data=TAXDATA,
                       weights=WEIGHTS,
                       start_year=Records.PUF_YEAR)
    calc_x = Calculator(policy=policy_x, records=record_x)
    calc_y = Calculator(policy=policy_y, records=record_y)
    x_start = calc_x.current_year
    y_start = calc_y.current_year
    calc_y.diagnostic_table(base_calc=calc_x)
    assert calc_y.current_year == y_start
    assert calc_x.current_year == x_start
Example #4
0
def test_dist_table_sum_row(cps_subsample):
    rec = Records.cps_constructor(data=cps_subsample)
    calc = Calculator(policy=Policy(), records=rec)
    calc.calc_all()
    # create three distribution tables and compare the ALL row contents
    tb1, _ = calc.distribution_tables(None, 'standard_income_bins')
    tb2, _ = calc.distribution_tables(None, 'soi_agi_bins')
    tb3, _ = calc.distribution_tables(None, 'weighted_deciles')
    tb4, _ = calc.distribution_tables(None,
                                      'weighted_deciles',
                                      pop_quantiles=True)
    assert np.allclose(tb1.loc['ALL'], tb2.loc['ALL'])
    assert np.allclose(tb1.loc['ALL'], tb3.loc['ALL'])
    # make sure population count is larger than filing-unit count
    assert tb4.at['ALL', 'count'] > tb1.at['ALL', 'count']
    # make sure population table has same ALL row values as filing-unit table
    for col in ['count', 'count_StandardDed', 'count_ItemDed', 'count_AMT']:
        tb4.at['ALL', col] = tb1.at['ALL', col]
    assert np.allclose(tb1.loc['ALL'], tb4.loc['ALL'])
    # make sure population table has same ALL tax liabilities as diagnostic tbl
    dgt = calc.diagnostic_table(1)
    assert np.allclose([tb4.at['ALL', 'iitax'], tb4.at['ALL', 'payrolltax']], [
        dgt.at['Ind Income Tax ($b)', calc.current_year],
        dgt.at['Payroll Taxes ($b)', calc.current_year]
    ])
def test_diagnostic_table(cps_subsample):
    """
    Test diagnostic_table method.
    """
    recs = Records.cps_constructor(data=cps_subsample)
    calc = Calculator(policy=Policy(), records=recs)
    adt = calc.diagnostic_table(3)
    assert isinstance(adt, pd.DataFrame)
def test_diagnostic_table(cps_subsample):
    """
    Test diagnostic_table method.
    """
    recs = Records.cps_constructor(data=cps_subsample)
    calc = Calculator(policy=Policy(), records=recs)
    adt = calc.diagnostic_table(3)
    assert isinstance(adt, pd.DataFrame)
Example #7
0
def taxcalc_clp_results():
    """
    Use taxcalc package on this computer to compute aggregate income tax
    and payroll tax revenues for years beginning with MIN_START_YEAR and
    ending with MAX_START_YEAR+NUMBER_OF_YEARS-1 for current-law policy.
    Return two aggregate revenue dictionaries indexed by calendar year.
    """
    calc = Calculator(policy=Policy(), records=Records(data=PUF_PATH))
    nyrs = MAX_START_YEAR + NUMBER_OF_YEARS - MIN_START_YEAR
    adt = calc.diagnostic_table(num_years=nyrs)
    # note that adt is Pandas DataFrame object
    return (adt.xs('Ind inc tax ($b)').to_dict(),
            adt.xs('Payroll tax ($b)').to_dict())
Example #8
0
def test_with_pufcsv(puf_fullsample):
    # specify usermods dictionary in code
    start_year = 2017
    reform_year = start_year
    analysis_year = 2026
    year_n = analysis_year - start_year
    reform = {
        '_FICA_ss_trt': [0.2]
    }
    usermods = dict()
    usermods['policy'] = {reform_year: reform}
    usermods['consumption'] = {}
    usermods['behavior'] = {}
    usermods['growdiff_baseline'] = {}
    usermods['growdiff_response'] = {}
    usermods['growmodel'] = {}
    seed = random_seed(usermods)
    assert seed == 580419828
    # create a Policy object (pol) containing reform policy parameters
    pol = Policy()
    pol.implement_reform(usermods['policy'])
    # create a Records object (rec) containing all puf.csv input records
    rec = Records(data=puf_fullsample)
    # create a Calculator object using clp policy and puf records
    calc = Calculator(policy=pol, records=rec)
    while calc.current_year < analysis_year:
        calc.increment_year()
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(1)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    assert taxes_fullsample is not None
    fulls_reform_revenue = float(taxes_fullsample.loc[analysis_year])
    # call run_nth_year_tax_calc_model function
    resdict = run_nth_year_taxcalc_model(year_n, start_year,
                                         use_puf_not_cps=True,
                                         use_full_sample=True,
                                         user_mods=usermods,
                                         return_dict=True)
    total = resdict['aggr_2']
    tbi_reform_revenue = float(total['combined_tax_9']) * 1e-9
    # assert that tbi revenue is similar to the fullsample calculation
    diff = abs(fulls_reform_revenue - tbi_reform_revenue)
    proportional_diff = diff / fulls_reform_revenue
    frmt = 'f,d,adiff,pdiff=  {:.4f}  {:.4f}  {:.4f}  {}'
    print(frmt.format(fulls_reform_revenue, tbi_reform_revenue,
                      diff, proportional_diff))
    assert proportional_diff < 0.0001  # one-hundredth of one percent
Example #9
0
def test_with_pufcsv(puf_fullsample):
    # specify usermods dictionary in code
    start_year = 2017
    reform_year = start_year
    analysis_year = 2026
    year_n = analysis_year - start_year
    reform = {
        '_FICA_ss_trt': [0.2]
    }
    usermods = dict()
    usermods['policy'] = {reform_year: reform}
    usermods['consumption'] = {}
    usermods['growdiff_baseline'] = {}
    usermods['growdiff_response'] = {}
    seed = random_seed(usermods)
    assert seed == 2568216296
    # create a Policy object (pol) containing reform policy parameters
    pol = Policy()
    pol.implement_reform(usermods['policy'])
    # create a Records object (rec) containing all puf.csv input records
    rec = Records(data=puf_fullsample)
    # create a Calculator object using clp policy and puf records
    calc = Calculator(policy=pol, records=rec)
    while calc.current_year < analysis_year:
        calc.increment_year()
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(1)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    assert taxes_fullsample is not None
    fulls_reform_revenue = float(taxes_fullsample.loc[analysis_year])
    # call run_nth_year_tax_calc_model function
    resdict = run_nth_year_taxcalc_model(year_n, start_year,
                                         use_puf_not_cps=True,
                                         use_full_sample=True,
                                         user_mods=usermods,
                                         return_dict=True)
    total = resdict['aggr_2']
    tbi_reform_revenue = float(total['combined_tax_9'])
    # assert that tbi revenue is similar to the fullsample calculation
    diff = abs(fulls_reform_revenue - tbi_reform_revenue)
    proportional_diff = diff / fulls_reform_revenue
    frmt = 'f,d,adiff,pdiff=  {:.4f}  {:.4f}  {:.4f}  {}'
    print(frmt.format(fulls_reform_revenue, tbi_reform_revenue,
                      diff, proportional_diff))
    assert proportional_diff < 0.0001  # one-hundredth of one percent
Example #10
0
def test_agg():
    """
    Test Tax-Calculator aggregate taxes with no policy reform using puf.csv
    """
    # create a Policy object (clp) containing current-law policy parameters
    clp = Policy()
    # create a Records object (puf) containing puf.csv input records
    puf = Records(data=PUFCSV_PATH)
    # create a Calculator object using clp policy and puf records
    calc = Calculator(policy=clp, records=puf)
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(num_years=10)
    # convert adt results to a string with a trailing EOL character
    adtstr = adt.to_string() + '\n'
    # generate differences between actual and expected results
    actual = adtstr.splitlines(True)
    with open(AGGRES_PATH, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    expected = expected_results.splitlines(True)
    diff = difflib.unified_diff(expected,
                                actual,
                                fromfile='expected',
                                tofile='actual',
                                n=0)
    # convert diff generator into a list of lines:
    diff_lines = list()
    for line in diff:
        diff_lines.append(line)
    # test failure if there are any diff_lines
    if len(diff_lines) > 0:
        new_filename = '{}{}'.format(AGGRES_PATH[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(adtstr)
        sys.stdout.write('*************************************************\n')
        sys.stdout.write('*** NEW RESULTS IN pufcsv_agg_actual.txt FILE ***\n')
        sys.stdout.write('*** if new OK, copy pufcsv_agg_actual.txt to  ***\n')
        sys.stdout.write('***                 pufcsv_agg_expect.txt     ***\n')
        sys.stdout.write('***            and rerun test.                ***\n')
        sys.stdout.write('*************************************************\n')
        assert False
def test_agg():
    """
    Test Tax-Calculator aggregate taxes with no policy reform using puf.csv
    """
    # create a Policy object (clp) containing current-law policy parameters
    clp = Policy()
    # create a Records object (puf) containing puf.csv input records
    puf = Records(data=PUFCSV_PATH)
    # create a Calculator object using clp policy and puf records
    calc = Calculator(policy=clp, records=puf)
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(num_years=10)
    # convert adt results to a string with a trailing EOL character
    adtstr = adt.to_string() + '\n'
    # generate differences between actual and expected results
    actual = adtstr.splitlines(True)
    with open(AGGRES_PATH, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    expected = expected_results.splitlines(True)
    diff = difflib.unified_diff(expected, actual,
                                fromfile='expected', tofile='actual', n=0)
    # convert diff generator into a list of lines:
    diff_lines = list()
    for line in diff:
        diff_lines.append(line)
    # test failure if there are any diff_lines
    if len(diff_lines) > 0:
        new_filename = '{}{}'.format(AGGRES_PATH[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(adtstr)
        sys.stdout.write('*************************************************\n')
        sys.stdout.write('*** NEW RESULTS IN pufcsv_agg_actual.txt FILE ***\n')
        sys.stdout.write('*** if new OK, copy pufcsv_agg_actual.txt to  ***\n')
        sys.stdout.write('***                 pufcsv_agg_expect.txt     ***\n')
        sys.stdout.write('***            and rerun test.                ***\n')
        sys.stdout.write('*************************************************\n')
        assert False
Example #12
0
def taxcalc_results(start_year, reform_dict, itax_clp, fica_clp):
    """
    Use taxcalc package on this computer to compute aggregate income tax and
    payroll tax revenue difference (between reform and current-law policy)
    for ten years beginning with the specified start_year using the specified
    reform_dict dictionary and the two specified current-law-policy results
    dictionaries.
    Return two aggregate tax revenue difference dictionaries indexed by
    calendar year.
    """
    pol = Policy()
    pol.implement_reform(reform_dict)
    calc = Calculator(policy=pol, records=Records(data=PUF_PATH))
    calc.advance_to_year(start_year)
    adt = calc.diagnostic_table(num_years=NUMBER_OF_YEARS)
    # note that adt is Pandas DataFrame object
    itax_ref = adt.xs('Ind inc tax ($b)').to_dict()
    fica_ref = adt.xs('Payroll tax ($b)').to_dict()
    itax_diff = {}
    fica_diff = {}
    for year in itax_ref:
        itax_diff[year] = round(itax_ref[year] - itax_clp[year], 1)
        fica_diff[year] = round(fica_ref[year] - fica_clp[year], 1)
    return (itax_diff, fica_diff)
def test_dist_table_sum_row(cps_subsample):
    rec = Records.cps_constructor(data=cps_subsample)
    calc = Calculator(policy=Policy(), records=rec)
    calc.calc_all()
    # create three distribution tables and compare the ALL row contents
    tb1, _ = calc.distribution_tables(None, 'standard_income_bins')
    tb2, _ = calc.distribution_tables(None, 'soi_agi_bins')
    tb3, _ = calc.distribution_tables(None, 'weighted_deciles')
    tb4, _ = calc.distribution_tables(None, 'weighted_deciles',
                                      pop_quantiles=True)
    assert np.allclose(tb1.loc['ALL'], tb2.loc['ALL'])
    assert np.allclose(tb1.loc['ALL'], tb3.loc['ALL'])
    # make sure population count is larger than filing-unit count
    assert tb4.at['ALL', 'count'] > tb1.at['ALL', 'count']
    # make sure population table has same ALL row values as filing-unit table
    for col in ['count', 'count_StandardDed', 'count_ItemDed', 'count_AMT']:
        tb4.at['ALL', col] = tb1.at['ALL', col]
    assert np.allclose(tb1.loc['ALL'], tb4.loc['ALL'])
    # make sure population table has same ALL tax liabilities as diagnostic tbl
    dgt = calc.diagnostic_table(1)
    assert np.allclose([tb4.at['ALL', 'iitax'],
                        tb4.at['ALL', 'payrolltax']],
                       [dgt.at['Ind Income Tax ($b)', calc.current_year],
                        dgt.at['Payroll Taxes ($b)', calc.current_year]])
def test_agg(tests_path, cps_fullsample):
    """
    Test current-law aggregate taxes using cps.csv file.
    """
    # pylint: disable=too-many-statements,too-many-locals
    nyrs = 10
    # create a baseline Policy object containing current law policy parameters
    baseline_policy = Policy()
    # create a Records object (rec) containing all cps.csv input records
    recs = Records.cps_constructor(data=cps_fullsample)
    # create a Calculator object using baseline policy and cps records
    calc = Calculator(policy=baseline_policy, records=recs)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt to a string with a trailing EOL character
    actual_results = adt.to_string() + '\n'
    # read expected results from file
    aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    # ensure actual and expected results have no nonsmall differences
    diffs = nonsmall_diffs(actual_results.splitlines(True),
                           expected_results.splitlines(True))
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(actual_results)
        msg = 'CPSCSV AGG RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy cpscsv_agg_actual.txt to  ---\n'
        msg += '---                 cpscsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    rn_seed = 180  # to ensure sub-sample is always the same
    subfrac = 0.03  # sub-sample fraction
    subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed)
    recs_subsample = Records.cps_constructor(data=subsample)
    calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    msg = ''
    for cyr in range(calc_start_year, calc_start_year + nyrs):
        if cyr == calc_start_year:
            reltol = 0.014
        else:
            reltol = 0.006
        if not np.allclose(
                taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0,
                rtol=reltol):
            reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1.
            line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}'
            line2 = '\n  when subfrac={:.3f}, rtol={:.4f}, seed={}'
            line3 = '\n  with sub={:.3f}, full={:.3f}, rdiff={:.4f}'
            msg += line1.format(cyr)
            msg += line2.format(subfrac, reltol, rn_seed)
            msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr],
                                reldiff)
    if msg:
        raise ValueError(msg)
def test_agg(tests_path, cps_fullsample):
    """
    Test current-law aggregate taxes using cps.csv file.
    """
    # pylint: disable=too-many-statements,too-many-locals
    nyrs = 10
    # create a baseline Policy object with current-law policy parameters
    baseline_policy = Policy()
    # create a Records object (rec) containing all cps.csv input records
    recs = Records.cps_constructor(data=cps_fullsample)
    # create a Calculator object using baseline policy and cps records
    calc = Calculator(policy=baseline_policy, records=recs)
    calc.advance_to_year(START_YEAR)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs).round(1)  # column labels are int
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # compare actual DataFrame, adt, with the expected DataFrame, edt
    aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.csv')
    edt = pd.read_csv(aggres_path, index_col=False)  # column labels are str
    edt.drop('Unnamed: 0', axis='columns', inplace=True)
    assert len(adt.columns.values) == len(edt.columns.values)
    diffs = False
    for icol in adt.columns.values:
        if not np.allclose(adt[icol], edt[str(icol)]):
            diffs = True
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.csv')
        adt.to_csv(new_filename, float_format='%.1f')
        msg = 'CPSCSV AGG RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN cpscsv_agg_actual.csv FILE ---\n'
        msg += '--- if new OK, copy cpscsv_agg_actual.csv to  ---\n'
        msg += '---                 cpscsv_agg_expect.csv     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    rn_seed = 180  # to ensure sub-sample is always the same
    subfrac = 0.03  # sub-sample fraction
    subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed)
    recs_subsample = Records.cps_constructor(data=subsample)
    calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample)
    calc_subsample.advance_to_year(START_YEAR)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    msg = ''
    for cyr in range(calc_start_year, calc_start_year + nyrs):
        if cyr == calc_start_year:
            reltol = 0.0141
        else:
            reltol = 0.0105
        if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr],
                           atol=0.0, rtol=reltol):
            reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1.
            line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}'
            line2 = '\n  when subfrac={:.3f}, rtol={:.4f}, seed={}'
            line3 = '\n  with sub={:.3f}, full={:.3f}, rdiff={:.4f}'
            msg += line1.format(cyr)
            msg += line2.format(subfrac, reltol, rn_seed)
            msg += line3.format(taxes_subsample[cyr],
                                taxes_fullsample[cyr],
                                reldiff)
    if msg:
        raise ValueError(msg)
def test_agg(tests_path, cps_fullsample):
    """
    Test current-law aggregate taxes using cps.csv file.
    """
    # pylint: disable=too-many-statements,too-many-locals
    nyrs = 10
    # create a baseline Policy object with current-law policy parameters
    baseline_policy = Policy()
    # create a Records object (rec) containing all cps.csv input records
    recs = Records.cps_constructor(data=cps_fullsample)
    # create a Calculator object using baseline policy and cps records
    calc = Calculator(policy=baseline_policy, records=recs)
    calc.advance_to_year(START_YEAR)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs).round(1)  # column labels are int
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # compare actual DataFrame, adt, with the expected DataFrame, edt
    aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.csv')
    edt = pd.read_csv(aggres_path, index_col=False)  # column labels are str
    edt.drop('Unnamed: 0', axis='columns', inplace=True)
    assert len(adt.columns.values) == len(edt.columns.values)
    diffs = False
    for icol in adt.columns.values:
        if not np.allclose(adt[icol], edt[str(icol)]):
            diffs = True
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.csv')
        adt.to_csv(new_filename, float_format='%.1f')
        msg = 'CPSCSV AGG RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN cpscsv_agg_actual.csv FILE ---\n'
        msg += '--- if new OK, copy cpscsv_agg_actual.csv to  ---\n'
        msg += '---                 cpscsv_agg_expect.csv     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '---       (both are in taxcalc/tests)         ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    rn_seed = 180  # to ensure sub-sample is always the same
    subfrac = 0.03  # sub-sample fraction
    subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed)
    recs_subsample = Records.cps_constructor(data=subsample)
    calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample)
    calc_subsample.advance_to_year(START_YEAR)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    msg = ''
    for cyr in range(calc_start_year, calc_start_year + nyrs):
        if cyr == calc_start_year:
            reltol = 0.0141
        else:
            reltol = 0.0105
        if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr],
                           atol=0.0, rtol=reltol):
            reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1.
            line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}'
            line2 = '\n  when subfrac={:.3f}, rtol={:.4f}, seed={}'
            line3 = '\n  with sub={:.3f}, full={:.3f}, rdiff={:.4f}'
            msg += line1.format(cyr)
            msg += line2.format(subfrac, reltol, rn_seed)
            msg += line3.format(taxes_subsample[cyr],
                                taxes_fullsample[cyr],
                                reldiff)
    if msg:
        raise ValueError(msg)
Example #17
0
def test_agg(tests_path, puf_fullsample):
    """
    Test Tax-Calculator aggregate taxes with no policy reform using
    the full-sample puf.csv and a small sub-sample of puf.csv
    """
    # pylint: disable=too-many-locals,too-many-statements
    nyrs = 10
    # create a Policy object (clp) containing current-law policy parameters
    clp = Policy()
    # create a Records object (rec) containing all puf.csv input records
    rec = Records(data=puf_fullsample)
    # create a Calculator object using clp policy and puf records
    calc = Calculator(policy=clp, records=rec)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt results to a string with a trailing EOL character
    adtstr = adt.to_string() + '\n'
    # generate differences between actual and expected results
    actual = adtstr.splitlines(True)
    aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    expected = expected_results.splitlines(True)
    diff = difflib.unified_diff(expected, actual,
                                fromfile='expected', tofile='actual', n=0)
    # convert diff generator into a list of lines:
    diff_lines = list()
    for line in diff:
        diff_lines.append(line)
    # test failure if there are any diff_lines
    if diff_lines:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(adtstr)
        msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy pufcsv_agg_actual.txt to  ---\n'
        msg += '---                 pufcsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    fullsample = puf_fullsample
    rn_seed = 180  # to ensure sub-sample is always the same
    subfrac = 0.05  # sub-sample fraction
    subsample = fullsample.sample(frac=subfrac, random_state=rn_seed)
    rec_subsample = Records(data=subsample)
    calc_subsample = Calculator(policy=Policy(), records=rec_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    reltol = 0.01  # maximum allowed relative difference in tax liability
    if not np.allclose(taxes_subsample, taxes_fullsample,
                       atol=0.0, rtol=reltol):
        msg = 'PUFCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n'
        msg += 'WHEN subfrac={:.3f}, rtol={:.4f}, seed={}\n'.format(subfrac,
                                                                    reltol,
                                                                    rn_seed)
        it_sub = np.nditer(taxes_subsample, flags=['f_index'])
        it_all = np.nditer(taxes_fullsample, flags=['f_index'])
        while not it_sub.finished:
            cyr = it_sub.index + calc_start_year
            tax_sub = float(it_sub[0])
            tax_all = float(it_all[0])
            reldiff = abs(tax_sub - tax_all) / abs(tax_all)
            if reldiff > reltol:
                msgstr = ' year,sub,full,reldiff= {}\t{:.2f}\t{:.2f}\t{:.4f}\n'
                msg += msgstr.format(cyr, tax_sub, tax_all, reldiff)
            it_sub.iternext()
            it_all.iternext()
        raise ValueError(msg)
Example #18
0
def test_agg(tests_path):
    """
    Test current-law aggregate taxes using cps.csv file.
    """
    # pylint: disable=too-many-statements,too-many-locals
    nyrs = 10
    # create a baseline Policy object containing 2017_law.json parameters
    pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json')
    pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None)
    baseline_policy = Policy()
    baseline_policy.implement_reform(pre_tcja['policy'])
    # create a Records object (rec) containing all cps.csv input records
    rec = Records.cps_constructor()
    # create a Calculator object using baseline policy and cps records
    calc = Calculator(policy=baseline_policy, records=rec)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt to a string with a trailing EOL character
    actual_results = adt.to_string() + '\n'
    # read expected results from file
    aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    # ensure actual and expected results have no nonsmall differences
    if sys.version_info.major == 2:
        small = 0.0  # tighter test for Python 2.7
    else:
        small = 0.1  # looser test for Python 3.6
    diffs = nonsmall_diffs(actual_results.splitlines(True),
                           expected_results.splitlines(True), small)
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(actual_results)
        msg = 'CPSCSV AGG RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy cpscsv_agg_actual.txt to  ---\n'
        msg += '---                 cpscsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    cps_filepath = os.path.join(tests_path, '..', 'cps.csv.gz')
    fullsample = pd.read_csv(cps_filepath)
    rn_seed = 180  # to ensure sub-sample is always the same
    subfrac = 0.03  # sub-sample fraction
    subsample = fullsample.sample(frac=subfrac, random_state=rn_seed)
    rec_subsample = Records(data=subsample,
                            gfactors=Growfactors(),
                            weights=Records.CPS_WEIGHTS_FILENAME,
                            adjust_ratios=Records.CPS_RATIOS_FILENAME,
                            start_year=Records.CPSCSV_YEAR)
    calc_subsample = Calculator(policy=baseline_policy, records=rec_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    reltol = 0.01  # maximum allowed relative difference in tax liability
    # TODO: skip first year because of BUG in cps_weights.csv file
    taxes_subsample = taxes_subsample[1:]  # TODO: eliminate code
    taxes_fullsample = taxes_fullsample[1:]  # TODO: eliminate code
    if not np.allclose(
            taxes_subsample, taxes_fullsample, atol=0.0, rtol=reltol):
        msg = 'CPSCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n'
        msg += 'WHEN subfrac={:.3f}, rtol={:.4f}, seed={}\n'.format(
            subfrac, reltol, rn_seed)
        it_sub = np.nditer(taxes_subsample, flags=['f_index'])
        it_all = np.nditer(taxes_fullsample, flags=['f_index'])
        while not it_sub.finished:
            cyr = it_sub.index + calc_start_year
            tax_sub = float(it_sub[0])
            tax_all = float(it_all[0])
            reldiff = abs(tax_sub - tax_all) / abs(tax_all)
            if reldiff > reltol:
                msgstr = ' year,sub,full,reldiff= {}\t{:.2f}\t{:.2f}\t{:.4f}\n'
                msg += msgstr.format(cyr, tax_sub, tax_all, reldiff)
            it_sub.iternext()
            it_all.iternext()
        raise ValueError(msg)
Example #19
0
def test_agg(tests_path, puf_fullsample):
    """
    Test Tax-Calculator aggregate taxes with no policy reform using
    the full-sample puf.csv and a small sub-sample of puf.csv
    """
    # pylint: disable=too-many-locals,too-many-statements
    nyrs = 10
    # create a baseline Policy object containing 2017_law.json parameters
    pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json')
    pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None)
    baseline_policy = Policy()
    baseline_policy.implement_reform(pre_tcja['policy'])
    # create a Records object (rec) containing all puf.csv input records
    recs = Records(data=puf_fullsample)
    # create a Calculator object using baseline policy and puf records
    calc = Calculator(policy=baseline_policy, records=recs)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt results to a string with a trailing EOL character
    adtstr = adt.to_string(float_format='%8.1f') + '\n'
    # create actual and expected lists of diagnostic table lines
    actual = adtstr.splitlines(True)
    aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    expect = expected_results.splitlines(True)
    # ensure actual and expect lines have differences no more than small value
    diffs = nonsmall_diffs(actual, expect)
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(adtstr)
        msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy pufcsv_agg_actual.txt to  ---\n'
        msg += '---                 pufcsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    fullsample = puf_fullsample
    rn_seed = 2222  # to ensure sub-sample is always the same
    subfrac = 0.05  # sub-sample fraction
    subsample = fullsample.sample(frac=subfrac, random_state=rn_seed)
    recs_subsample = Records(data=subsample)
    calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    msg = ''
    for cyr in range(calc_start_year, calc_start_year + nyrs):
        reltol = 0.01  # maximum allowed relative difference in tax liability
        if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr],
                           atol=0.0, rtol=reltol):
            reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1.
            line1 = '\nPUFCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}'
            line2 = '\n  when subfrac={:.3f}, rtol={:.4f}, seed={}'
            line3 = '\n  with sub={:.3f}, full={:.3f}, rdiff={:.4f}'
            msg += line1.format(cyr)
            msg += line2.format(subfrac, reltol, rn_seed)
            msg += line3.format(taxes_subsample[cyr],
                                taxes_fullsample[cyr],
                                reldiff)
    if msg:
        raise ValueError(msg)
Example #20
0
def test_diagnostic_table(cps_subsample):
    recs = Records.cps_constructor(data=cps_subsample, no_benefits=True)
    calc = Calculator(policy=Policy(), records=recs)
    adt = calc.diagnostic_table(3)
    assert isinstance(adt, pd.DataFrame)
Example #21
0
def test_agg(tests_path, puf_fullsample):
    """
    Test Tax-Calculator aggregate taxes with no policy reform using
    the full-sample puf.csv and a small sub-sample of puf.csv
    """
    # pylint: disable=too-many-locals,too-many-statements
    nyrs = 10
    # create a baseline Policy object containing 2017_law.json parameters
    pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json')
    pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None)
    baseline_policy = Policy()
    baseline_policy.implement_reform(pre_tcja['policy'])
    # create a Records object (rec) containing all puf.csv input records
    recs = Records(data=puf_fullsample)
    # create a Calculator object using baseline policy and puf records
    calc = Calculator(policy=baseline_policy, records=recs)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt results to a string with a trailing EOL character
    adtstr = adt.to_string(float_format='%8.1f') + '\n'
    # create actual and expected lists of diagnostic table lines
    actual = adtstr.splitlines(True)
    aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    expect = expected_results.splitlines(True)
    # ensure actual and expect lines have differences no more than small value
    diffs = nonsmall_diffs(actual, expect)
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(adtstr)
        msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy pufcsv_agg_actual.txt to  ---\n'
        msg += '---                 pufcsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    fullsample = puf_fullsample
    rn_seed = 2222  # to ensure sub-sample is always the same
    subfrac = 0.05  # sub-sample fraction
    subsample = fullsample.sample(frac=subfrac, random_state=rn_seed)
    recs_subsample = Records(data=subsample)
    calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    msg = ''
    for cyr in range(calc_start_year, calc_start_year + nyrs):
        reltol = 0.01  # maximum allowed relative difference in tax liability
        if not np.allclose(
                taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0,
                rtol=reltol):
            reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1.
            line1 = '\nPUFCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}'
            line2 = '\n  when subfrac={:.3f}, rtol={:.4f}, seed={}'
            line3 = '\n  with sub={:.3f}, full={:.3f}, rdiff={:.4f}'
            msg += line1.format(cyr)
            msg += line2.format(subfrac, reltol, rn_seed)
            msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr],
                                reldiff)
    if msg:
        raise ValueError(msg)
Example #22
0
def test_agg(tests_path, cps_fullsample):
    """
    Test current-law aggregate taxes using cps.csv file.
    """
    # pylint: disable=too-many-statements,too-many-locals
    nyrs = 10
    # create a baseline Policy object containing 2017_law.json parameters
    pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json')
    pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None)
    baseline_policy = Policy()
    baseline_policy.implement_reform(pre_tcja['policy'])
    # create a Records object (rec) containing all cps.csv input records
    recs = Records.cps_constructor(data=cps_fullsample)
    # create a Calculator object using baseline policy and cps records
    calc = Calculator(policy=baseline_policy, records=recs)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt to a string with a trailing EOL character
    actual_results = adt.to_string(float_format='%8.1f') + '\n'
    # read expected results from file
    aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    # ensure actual and expected results have no nonsmall differences
    diffs = nonsmall_diffs(actual_results.splitlines(True),
                           expected_results.splitlines(True))
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(actual_results)
        msg = 'CPSCSV AGG RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy cpscsv_agg_actual.txt to  ---\n'
        msg += '---                 cpscsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    rn_seed = 180  # to ensure sub-sample is always the same
    subfrac = 0.03  # sub-sample fraction
    subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed)
    recs_subsample = Records.cps_constructor(data=subsample)
    calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    msg = ''
    for cyr in range(calc_start_year, calc_start_year + nyrs):
        if cyr == calc_start_year:
            reltol = 0.014
        else:
            reltol = 0.006
        if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr],
                           atol=0.0, rtol=reltol):
            reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1.
            line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}'
            line2 = '\n  when subfrac={:.3f}, rtol={:.4f}, seed={}'
            line3 = '\n  with sub={:.3f}, full={:.3f}, rdiff={:.4f}'
            msg += line1.format(cyr)
            msg += line2.format(subfrac, reltol, rn_seed)
            msg += line3.format(taxes_subsample[cyr],
                                taxes_fullsample[cyr],
                                reldiff)
    if msg:
        raise ValueError(msg)
Example #23
0
def test_Calculator_diagnostic_table():
    policy = Policy()
    puf = Records(data=TAXDATA, weights=WEIGHTS, start_year=Records.PUF_YEAR)
    calc = Calculator(policy=policy, records=puf)
    calc.diagnostic_table()
Example #24
0
def test_diagnostic_table():
    policy = Policy()
    TAX_DTA.flpdyr += 18  # flpdyr==2009 so that Records ctor will apply blowup
    puf = Records(data=TAX_DTA, weights=WEIGHTS)
    calc = Calculator(policy=policy, records=puf)
    calc.diagnostic_table()
Example #25
0
def test_agg(tests_path, puf_fullsample):
    """
    Test Tax-Calculator aggregate taxes with no policy reform using
    the full-sample puf.csv and a small sub-sample of puf.csv
    """
    # pylint: disable=too-many-locals,too-many-statements
    nyrs = 10
    # create a baseline Policy object containing 2017_law.json parameters
    pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json')
    pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None)
    baseline_policy = Policy()
    baseline_policy.implement_reform(pre_tcja['policy'])
    # create a Records object (rec) containing all puf.csv input records
    rec = Records(data=puf_fullsample)
    # create a Calculator object using baseline policy and puf records
    calc = Calculator(policy=baseline_policy, records=rec)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt results to a string with a trailing EOL character
    adtstr = adt.to_string() + '\n'
    # create actual and expected lists of diagnostic table lines
    actual = adtstr.splitlines(True)
    aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    expect = expected_results.splitlines(True)
    # ensure actual and expect lines have differences no more than small value
    if sys.version_info.major == 2:
        small = 0.0  # tighter test for Python 2.7
    else:
        small = 0.1  # looser test for Python 3.6
    diffs = nonsmall_diffs(actual, expect, small)
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(adtstr)
        msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy pufcsv_agg_actual.txt to  ---\n'
        msg += '---                 pufcsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    fullsample = puf_fullsample
    rn_seed = 180  # to ensure sub-sample is always the same
    subfrac = 0.05  # sub-sample fraction
    subsample = fullsample.sample(frac=subfrac, random_state=rn_seed)
    rec_subsample = Records(data=subsample)
    calc_subsample = Calculator(policy=baseline_policy, records=rec_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    reltol = 0.01  # maximum allowed relative difference in tax liability
    if not np.allclose(taxes_subsample, taxes_fullsample,
                       atol=0.0, rtol=reltol):
        msg = 'PUFCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n'
        msg += 'WHEN subfrac={:.3f}, rtol={:.4f}, seed={}\n'.format(subfrac,
                                                                    reltol,
                                                                    rn_seed)
        it_sub = np.nditer(taxes_subsample, flags=['f_index'])
        it_all = np.nditer(taxes_fullsample, flags=['f_index'])
        while not it_sub.finished:
            cyr = it_sub.index + calc_start_year
            tax_sub = float(it_sub[0])
            tax_all = float(it_all[0])
            reldiff = abs(tax_sub - tax_all) / abs(tax_all)
            if reldiff > reltol:
                msgstr = ' year,sub,full,reldiff= {}\t{:.2f}\t{:.2f}\t{:.4f}\n'
                msg += msgstr.format(cyr, tax_sub, tax_all, reldiff)
            it_sub.iternext()
            it_all.iternext()
        raise ValueError(msg)
Example #26
0
def test_Calculator_diagnostic_table():
    policy = Policy()
    puf = Records(data=TAX_DTA, weights=WEIGHTS, start_year=Records.PUF_YEAR)
    calc = Calculator(policy=policy, records=puf)
    calc.diagnostic_table()