Beispiel #1
0
def differences(new_filename, old_filename, stat_kind, small=0.0):
    """
    Return message string if there are differences at least as large as small;
    otherwise (i.e., if there are only small differences) return empty string.
    """
    with open(new_filename, 'r') as vfile:
        new_text = vfile.read()
    with open(old_filename, 'r') as vfile:
        old_text = vfile.read()
    if nonsmall_diffs(new_text.splitlines(True), old_text.splitlines(True),
                      small):
        new_name = os.path.basename(new_filename)
        old_name = os.path.basename(old_filename)
        msg = '{} RESULTS DIFFER:\n'.format(stat_kind)
        msg += '-------------------------------------------------'
        msg += '-------------\n'
        msg += '--- NEW RESULTS IN {} FILE ---\n'.format(new_name)
        msg += '--- if new OK, copy {} to  ---\n'.format(new_name)
        msg += '---                 {}         ---\n'.format(old_name)
        msg += '---            and rerun test.                   '
        msg += '          ---\n'
        msg += '-------------------------------------------------'
        msg += '-------------\n'
    else:
        msg = ''
        os.remove(new_filename)
    return msg
def differences(new_filename, old_filename, stat_kind, small=0.0):
    """
    Return message string if there are differences at least as large as small;
    otherwise (i.e., if there are only small differences) return empty string.
    """
    with open(new_filename, 'r') as vfile:
        new_text = vfile.read()
    with open(old_filename, 'r') as vfile:
        old_text = vfile.read()
    if nonsmall_diffs(new_text.splitlines(True),
                      old_text.splitlines(True), small):
        new_name = os.path.basename(new_filename)
        old_name = os.path.basename(old_filename)
        msg = '{} RESULTS DIFFER:\n'.format(stat_kind)
        msg += '-------------------------------------------------'
        msg += '-------------\n'
        msg += '--- NEW RESULTS IN {} FILE ---\n'.format(new_name)
        msg += '--- if new OK, copy {} to  ---\n'.format(new_name)
        msg += '---                 {}         ---\n'.format(old_name)
        msg += '---            and rerun test.                   '
        msg += '          ---\n'
        msg += '-------------------------------------------------'
        msg += '-------------\n'
    else:
        msg = ''
        os.remove(new_filename)
    return msg
def test_run_tax_calc_model(tests_path):
    """
    Test tbi.run_nth_year_tax_calc_model function using CPS data.
    """
    user_modifications = {
        'policy': {
            2016: {
                '_II_rt3': [0.33],
                '_PT_rt3': [0.33],
                '_II_rt4': [0.33],
                '_PT_rt4': [0.33]
            }
        },
        'consumption': {
            2016: {
                '_MPC_e20400': [0.01]
            }
        },
        'behavior': {
            2016: {
                '_BE_sub': [0.25]
            }
        },
        'growdiff_baseline': {},
        'growdiff_response': {}
    }
    res = run_nth_year_tax_calc_model(year_n=2,
                                      start_year=2018,
                                      use_puf_not_cps=False,
                                      use_full_sample=False,
                                      user_mods=user_modifications,
                                      return_dict=True)
    assert isinstance(res, dict)
    # put actual results in a multiline string
    actual_results = ''
    for tbl in sorted(res.keys()):
        actual_results += 'TABLE {} RESULTS:\n'.format(tbl)
        actual_results += json.dumps(
            res[tbl], sort_keys=True, indent=4, separators=(',', ': ')) + '\n'
    # read expected results from file
    expect_fname = 'tbi_cps_expect.txt'
    expect_path = os.path.join(tests_path, expect_fname)
    with open(expect_path, 'r') as expect_file:
        expect_results = expect_file.read()
    # ensure actual and expect results have no differences
    diffs = nonsmall_diffs(actual_results.splitlines(True),
                           expect_results.splitlines(True))
    if diffs:
        actual_fname = '{}{}'.format(expect_fname[:-10], 'actual.txt')
        actual_path = os.path.join(tests_path, actual_fname)
        with open(actual_path, 'w') as actual_file:
            actual_file.write(actual_results)
        msg = 'TBI RESULTS DIFFER\n'
        msg += '----------------------------------------------\n'
        msg += '--- NEW RESULTS IN {} FILE ---\n'
        msg += '--- if new OK, copy {} to  ---\n'
        msg += '---                 {}     ---\n'
        msg += '---            and rerun test.             ---\n'
        msg += '----------------------------------------------\n'
        raise ValueError(msg.format(actual_fname, actual_fname, expect_fname))
Beispiel #4
0
 def res_and_out_are_same(base):
     """
     Return True if base.res and base.out file contents are the same;
     return False if base.res and base.out file contents differ.
     """
     with open(base + '.res') as resfile:
         act_res = resfile.read()
     with open(base + '.out') as outfile:
         exp_res = outfile.read()
     # check to see if act_res & exp_res have differences
     return not nonsmall_diffs(act_res.splitlines(True),
                               exp_res.splitlines(True))
 def res_and_out_are_same(base):
     """
     Return True if base.res and base.out file contents are the same;
     return False if base.res and base.out file contents differ.
     """
     with open(base + '.res') as resfile:
         act_res = resfile.read()
     with open(base + '.out') as outfile:
         exp_res = outfile.read()
     # check to see if act_res & exp_res have differences
     return not nonsmall_diffs(act_res.splitlines(True),
                               exp_res.splitlines(True))
def test_run_taxcalc_model(tests_path):
    """
    Test tbi.run_nth_year_taxcalc_model function using PUF data.
    """
    user_modifications = {
        'policy': {
            2016: {'_II_rt3': [0.33],
                   '_PT_rt3': [0.33],
                   '_II_rt4': [0.33],
                   '_PT_rt4': [0.33]}
        },
        'consumption': {
            2016: {'_MPC_e20400': [0.01]}
        },
        'growdiff_baseline': {
        },
        'growdiff_response': {
        }
    }
    res = run_nth_year_taxcalc_model(year_n=2, start_year=2018,
                                     use_puf_not_cps=True,
                                     use_full_sample=False,
                                     user_mods=user_modifications,
                                     return_dict=True)
    assert isinstance(res, dict)
    # put actual results in a multiline string
    actual_results = ''
    for tbl in sorted(res.keys()):
        actual_results += 'TABLE {} RESULTS:\n'.format(tbl)
        actual_results += json.dumps(res[tbl], sort_keys=True,
                                     indent=4, separators=(',', ': ')) + '\n'
    # read expected results from file
    expect_fname = 'tbi_puf_expect.txt'
    expect_path = os.path.join(tests_path, expect_fname)
    with open(expect_path, 'r') as expect_file:
        expect_results = expect_file.read()
    # ensure actual and expect results have no differences
    diffs = nonsmall_diffs(actual_results.splitlines(True),
                           expect_results.splitlines(True))
    if diffs:
        actual_fname = '{}{}'.format(expect_fname[:-10], 'actual.txt')
        actual_path = os.path.join(tests_path, actual_fname)
        with open(actual_path, 'w') as actual_file:
            actual_file.write(actual_results)
        msg = 'TBI RESULTS DIFFER\n'
        msg += '----------------------------------------------\n'
        msg += '--- NEW RESULTS IN {} FILE ---\n'
        msg += '--- if new OK, copy {} to  ---\n'
        msg += '---                 {}     ---\n'
        msg += '---            and rerun test.             ---\n'
        msg += '----------------------------------------------\n'
        raise ValueError(msg.format(actual_fname, actual_fname, expect_fname))
Beispiel #7
0
 def res_and_out_are_same(base):
     """
     Return true if base.res and base.out file contents are the same;
     return false if base.res and base.out file contents differ.
     """
     with open(base + '.out') as outfile:
         exp_res = outfile.read()
     exp = exp_res.splitlines(True)
     with open(base + '.res') as resfile:
         act_res = resfile.read()
     act = act_res.splitlines(True)
     # check that act & exp have differences no more than small value
     diffs = nonsmall_diffs(act, exp, small=1e-6)
     return not diffs
Beispiel #8
0
def test_run_tax_calc_model(using_puf, tests_path):
    res = run_nth_year_tax_calc_model(year_n=2,
                                      start_year=2018,
                                      use_puf_not_cps=using_puf,
                                      use_full_sample=False,
                                      user_mods=USER_MODS,
                                      return_dict=True)
    assert isinstance(res, dict)
    # put actual results in a multiline string
    actual_results = ''
    for tbl in sorted(res.keys()):
        actual_results += 'TABLE {} RESULTS:\n'.format(tbl)
        actual_results += json.dumps(
            res[tbl], sort_keys=True, indent=4, separators=(',', ': ')) + '\n'
    # read expected results from file
    if using_puf:
        expect_fname = 'tbi_puf_expect.txt'
    else:
        expect_fname = 'tbi_cps_expect.txt'
    expect_path = os.path.join(tests_path, expect_fname)
    with open(expect_path, 'r') as expect_file:
        expect_results = expect_file.read()
    # ensure actual and expect results have no differences
    diffs = nonsmall_diffs(actual_results.splitlines(True),
                           expect_results.splitlines(True))
    if diffs:
        actual_fname = '{}{}'.format(expect_fname[:-10], 'actual.txt')
        actual_path = os.path.join(tests_path, actual_fname)
        with open(actual_path, 'w') as actual_file:
            actual_file.write(actual_results)
        msg = 'TBI RESULTS DIFFER\n'
        msg += '----------------------------------------------\n'
        msg += '--- NEW RESULTS IN {} FILE ---\n'
        msg += '--- if new OK, copy {} to  ---\n'
        msg += '---                 {}     ---\n'
        msg += '---            and rerun test.             ---\n'
        msg += '----------------------------------------------\n'
        raise ValueError(msg.format(actual_fname, actual_fname, expect_fname))
Beispiel #9
0
def differences(afilename, efilename):
    """
    Check for differences between results in afilename and efilename files.
    """
    with open(afilename, 'r') as afile:
        actres = afile.read()
    with open(efilename, 'r') as efile:
        expres = efile.read()
    diffs = nonsmall_diffs(actres.splitlines(True), expres.splitlines(True),
                           0.0)
    if diffs:
        afname = os.path.basename(afilename)
        efname = os.path.basename(efilename)
        msg = 'COMPARE RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN {} FILE ---\n'
        msg += '--- if new OK, copy {} to  ---\n'
        msg += '---                 {}     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg.format(afname, afname, efname))
    else:
        os.remove(afilename)
def differences(afilename, efilename):
    """
    Check for differences between results in afilename and efilename files.
    """
    with open(afilename, 'r') as afile:
        actres = afile.read()
    with open(efilename, 'r') as efile:
        expres = efile.read()
    diffs = nonsmall_diffs(actres.splitlines(True),
                           expres.splitlines(True), 0.0)
    if diffs:
        afname = os.path.basename(afilename)
        efname = os.path.basename(efilename)
        msg = 'COMPARE RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN {} FILE ---\n'
        msg += '--- if new OK, copy {} to  ---\n'
        msg += '---                 {}     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg.format(afname, afname, efname))
    else:
        os.remove(afilename)
def test_agg(tests_path, cps_fullsample):
    """
    Test current-law aggregate taxes using cps.csv file.
    """
    # pylint: disable=too-many-statements,too-many-locals
    nyrs = 10
    # create a baseline Policy object containing current law policy parameters
    baseline_policy = Policy()
    # create a Records object (rec) containing all cps.csv input records
    recs = Records.cps_constructor(data=cps_fullsample)
    # create a Calculator object using baseline policy and cps records
    calc = Calculator(policy=baseline_policy, records=recs)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt to a string with a trailing EOL character
    actual_results = adt.to_string() + '\n'
    # read expected results from file
    aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    # ensure actual and expected results have no nonsmall differences
    diffs = nonsmall_diffs(actual_results.splitlines(True),
                           expected_results.splitlines(True))
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(actual_results)
        msg = 'CPSCSV AGG RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy cpscsv_agg_actual.txt to  ---\n'
        msg += '---                 cpscsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    rn_seed = 180  # to ensure sub-sample is always the same
    subfrac = 0.03  # sub-sample fraction
    subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed)
    recs_subsample = Records.cps_constructor(data=subsample)
    calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    msg = ''
    for cyr in range(calc_start_year, calc_start_year + nyrs):
        if cyr == calc_start_year:
            reltol = 0.014
        else:
            reltol = 0.006
        if not np.allclose(
                taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0,
                rtol=reltol):
            reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1.
            line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}'
            line2 = '\n  when subfrac={:.3f}, rtol={:.4f}, seed={}'
            line3 = '\n  with sub={:.3f}, full={:.3f}, rdiff={:.4f}'
            msg += line1.format(cyr)
            msg += line2.format(subfrac, reltol, rn_seed)
            msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr],
                                reldiff)
    if msg:
        raise ValueError(msg)
def test_agg(tests_path):
    """
    Test current-law aggregate taxes using cps.csv file.
    """
    # pylint: disable=too-many-statements,too-many-locals
    nyrs = 10
    # create a baseline Policy object containing 2017_law.json parameters
    pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json')
    pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None)
    baseline_policy = Policy()
    baseline_policy.implement_reform(pre_tcja['policy'])
    # create a Records object (rec) containing all cps.csv input records
    rec = Records.cps_constructor()
    # create a Calculator object using baseline policy and cps records
    calc = Calculator(policy=baseline_policy, records=rec)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt to a string with a trailing EOL character
    actual_results = adt.to_string() + '\n'
    # read expected results from file
    aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    # ensure actual and expected results have no nonsmall differences
    if sys.version_info.major == 2:
        small = 0.0  # tighter test for Python 2.7
    else:
        small = 0.1  # looser test for Python 3.6
    diffs = nonsmall_diffs(actual_results.splitlines(True),
                           expected_results.splitlines(True), small)
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(actual_results)
        msg = 'CPSCSV AGG RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy cpscsv_agg_actual.txt to  ---\n'
        msg += '---                 cpscsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    cps_filepath = os.path.join(tests_path, '..', 'cps.csv.gz')
    fullsample = pd.read_csv(cps_filepath)
    rn_seed = 180  # to ensure sub-sample is always the same
    subfrac = 0.03  # sub-sample fraction
    subsample = fullsample.sample(frac=subfrac, random_state=rn_seed)
    rec_subsample = Records(data=subsample,
                            gfactors=Growfactors(),
                            weights=Records.CPS_WEIGHTS_FILENAME,
                            adjust_ratios=Records.CPS_RATIOS_FILENAME,
                            start_year=Records.CPSCSV_YEAR)
    calc_subsample = Calculator(policy=baseline_policy, records=rec_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    reltol = 0.01  # maximum allowed relative difference in tax liability
    # TODO: skip first year because of BUG in cps_weights.csv file
    taxes_subsample = taxes_subsample[1:]  # TODO: eliminate code
    taxes_fullsample = taxes_fullsample[1:]  # TODO: eliminate code
    if not np.allclose(
            taxes_subsample, taxes_fullsample, atol=0.0, rtol=reltol):
        msg = 'CPSCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n'
        msg += 'WHEN subfrac={:.3f}, rtol={:.4f}, seed={}\n'.format(
            subfrac, reltol, rn_seed)
        it_sub = np.nditer(taxes_subsample, flags=['f_index'])
        it_all = np.nditer(taxes_fullsample, flags=['f_index'])
        while not it_sub.finished:
            cyr = it_sub.index + calc_start_year
            tax_sub = float(it_sub[0])
            tax_all = float(it_all[0])
            reldiff = abs(tax_sub - tax_all) / abs(tax_all)
            if reldiff > reltol:
                msgstr = ' year,sub,full,reldiff= {}\t{:.2f}\t{:.2f}\t{:.4f}\n'
                msg += msgstr.format(cyr, tax_sub, tax_all, reldiff)
            it_sub.iternext()
            it_all.iternext()
        raise ValueError(msg)
def test_agg(tests_path, puf_fullsample):
    """
    Test Tax-Calculator aggregate taxes with no policy reform using
    the full-sample puf.csv and a small sub-sample of puf.csv
    """
    # pylint: disable=too-many-locals,too-many-statements
    nyrs = 10
    # create a baseline Policy object containing 2017_law.json parameters
    pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json')
    pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None)
    baseline_policy = Policy()
    baseline_policy.implement_reform(pre_tcja['policy'])
    # create a Records object (rec) containing all puf.csv input records
    recs = Records(data=puf_fullsample)
    # create a Calculator object using baseline policy and puf records
    calc = Calculator(policy=baseline_policy, records=recs)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt results to a string with a trailing EOL character
    adtstr = adt.to_string(float_format='%8.1f') + '\n'
    # create actual and expected lists of diagnostic table lines
    actual = adtstr.splitlines(True)
    aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    expect = expected_results.splitlines(True)
    # ensure actual and expect lines have differences no more than small value
    diffs = nonsmall_diffs(actual, expect)
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(adtstr)
        msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy pufcsv_agg_actual.txt to  ---\n'
        msg += '---                 pufcsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    fullsample = puf_fullsample
    rn_seed = 2222  # to ensure sub-sample is always the same
    subfrac = 0.05  # sub-sample fraction
    subsample = fullsample.sample(frac=subfrac, random_state=rn_seed)
    recs_subsample = Records(data=subsample)
    calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    msg = ''
    for cyr in range(calc_start_year, calc_start_year + nyrs):
        reltol = 0.01  # maximum allowed relative difference in tax liability
        if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr],
                           atol=0.0, rtol=reltol):
            reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1.
            line1 = '\nPUFCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}'
            line2 = '\n  when subfrac={:.3f}, rtol={:.4f}, seed={}'
            line3 = '\n  with sub={:.3f}, full={:.3f}, rdiff={:.4f}'
            msg += line1.format(cyr)
            msg += line2.format(subfrac, reltol, rn_seed)
            msg += line3.format(taxes_subsample[cyr],
                                taxes_fullsample[cyr],
                                reldiff)
    if msg:
        raise ValueError(msg)
Beispiel #14
0
def test_agg(tests_path, puf_fullsample):
    """
    Test Tax-Calculator aggregate taxes with no policy reform using
    the full-sample puf.csv and a small sub-sample of puf.csv
    """
    # pylint: disable=too-many-locals,too-many-statements
    nyrs = 10
    # create a baseline Policy object containing 2017_law.json parameters
    pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json')
    pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None)
    baseline_policy = Policy()
    baseline_policy.implement_reform(pre_tcja['policy'])
    # create a Records object (rec) containing all puf.csv input records
    recs = Records(data=puf_fullsample)
    # create a Calculator object using baseline policy and puf records
    calc = Calculator(policy=baseline_policy, records=recs)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt results to a string with a trailing EOL character
    adtstr = adt.to_string(float_format='%8.1f') + '\n'
    # create actual and expected lists of diagnostic table lines
    actual = adtstr.splitlines(True)
    aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    expect = expected_results.splitlines(True)
    # ensure actual and expect lines have differences no more than small value
    diffs = nonsmall_diffs(actual, expect)
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(adtstr)
        msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy pufcsv_agg_actual.txt to  ---\n'
        msg += '---                 pufcsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    fullsample = puf_fullsample
    rn_seed = 2222  # to ensure sub-sample is always the same
    subfrac = 0.05  # sub-sample fraction
    subsample = fullsample.sample(frac=subfrac, random_state=rn_seed)
    recs_subsample = Records(data=subsample)
    calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    msg = ''
    for cyr in range(calc_start_year, calc_start_year + nyrs):
        reltol = 0.01  # maximum allowed relative difference in tax liability
        if not np.allclose(
                taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0,
                rtol=reltol):
            reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1.
            line1 = '\nPUFCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}'
            line2 = '\n  when subfrac={:.3f}, rtol={:.4f}, seed={}'
            line3 = '\n  with sub={:.3f}, full={:.3f}, rdiff={:.4f}'
            msg += line1.format(cyr)
            msg += line2.format(subfrac, reltol, rn_seed)
            msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr],
                                reldiff)
    if msg:
        raise ValueError(msg)
def test_agg(tests_path, cps_fullsample):
    """
    Test current-law aggregate taxes using cps.csv file.
    """
    # pylint: disable=too-many-statements,too-many-locals
    nyrs = 10
    # create a baseline Policy object containing 2017_law.json parameters
    pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json')
    pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None)
    baseline_policy = Policy()
    baseline_policy.implement_reform(pre_tcja['policy'])
    # create a Records object (rec) containing all cps.csv input records
    recs = Records.cps_constructor(data=cps_fullsample)
    # create a Calculator object using baseline policy and cps records
    calc = Calculator(policy=baseline_policy, records=recs)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt to a string with a trailing EOL character
    actual_results = adt.to_string(float_format='%8.1f') + '\n'
    # read expected results from file
    aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    # ensure actual and expected results have no nonsmall differences
    diffs = nonsmall_diffs(actual_results.splitlines(True),
                           expected_results.splitlines(True))
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(actual_results)
        msg = 'CPSCSV AGG RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy cpscsv_agg_actual.txt to  ---\n'
        msg += '---                 cpscsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    rn_seed = 180  # to ensure sub-sample is always the same
    subfrac = 0.03  # sub-sample fraction
    subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed)
    recs_subsample = Records.cps_constructor(data=subsample)
    calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    msg = ''
    for cyr in range(calc_start_year, calc_start_year + nyrs):
        if cyr == calc_start_year:
            reltol = 0.014
        else:
            reltol = 0.006
        if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr],
                           atol=0.0, rtol=reltol):
            reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1.
            line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}'
            line2 = '\n  when subfrac={:.3f}, rtol={:.4f}, seed={}'
            line3 = '\n  with sub={:.3f}, full={:.3f}, rdiff={:.4f}'
            msg += line1.format(cyr)
            msg += line2.format(subfrac, reltol, rn_seed)
            msg += line3.format(taxes_subsample[cyr],
                                taxes_fullsample[cyr],
                                reldiff)
    if msg:
        raise ValueError(msg)
Beispiel #16
0
def test_mtr(tests_path, puf_path):
    """
    Test Tax-Calculator marginal tax rates with no policy reform using puf.csv

    Compute histograms for each marginal tax rate income type using
    sample input from the puf.csv file and writing output to a string,
    which is then compared for differences with EXPECTED_MTR_RESULTS.
    """
    # pylint: disable=too-many-locals,too-many-statements
    assert len(PTAX_MTR_BIN_EDGES) == len(ITAX_MTR_BIN_EDGES)
    # construct actual results string, res
    res = ''
    if MTR_NEG_DIFF:
        res += 'MTR computed using NEGATIVE finite_diff '
    else:
        res += 'MTR computed using POSITIVE finite_diff '
    res += 'for tax year {}\n'.format(MTR_TAX_YEAR)
    # create a Policy object (clp) containing current-law policy parameters
    clp = Policy()
    clp.set_year(MTR_TAX_YEAR)
    # create a Records object (puf) containing puf.csv input records
    puf = Records(data=puf_path)
    recid = puf.RECID  # pylint: disable=no-member
    # create a Calculator object using clp policy and puf records
    calc = Calculator(policy=clp, records=puf)
    res += '{} = {}\n'.format('Total number of data records', puf.array_length)
    res += 'PTAX mtr histogram bin edges:\n'
    res += '     {}\n'.format(PTAX_MTR_BIN_EDGES)
    res += 'ITAX mtr histogram bin edges:\n'
    res += '     {}\n'.format(ITAX_MTR_BIN_EDGES)
    variable_header = 'PTAX and ITAX mtr histogram bin counts for'
    # compute marginal tax rate (mtr) histograms for each mtr variable
    for var_str in Calculator.MTR_VALID_VARIABLES:
        zero_out = (var_str == 'e01400')
        (mtr_ptax, mtr_itax, _) = calc.mtr(variable_str=var_str,
                                           negative_finite_diff=MTR_NEG_DIFF,
                                           zero_out_calculated_vars=zero_out,
                                           wrt_full_compensation=False)
        if zero_out:
            # check that calculated variables are consistent
            assert np.allclose((calc.array('iitax') +
                                calc.array('payrolltax')),
                               calc.array('combined'))
            assert np.allclose((calc.array('ptax_was') +
                                calc.array('setax') +
                                calc.array('ptax_amc')),
                               calc.array('payrolltax'))
            assert np.allclose(calc.array('c21060') - calc.array('c21040'),
                               calc.array('c04470'))
            assert np.allclose(calc.array('taxbc') + calc.array('c09600'),
                               calc.array('c05800'))
            assert np.allclose((calc.array('c05800') +
                                calc.array('othertaxes') -
                                calc.array('c07100')),
                               calc.array('c09200'))
            assert np.allclose(calc.array('c09200') - calc.array('refund'),
                               calc.array('iitax'))
        if var_str == 'e00200s':
            # only MARS==2 filing units have valid MTR values
            mtr_ptax = mtr_ptax[calc.array('MARS') == 2]
            mtr_itax = mtr_itax[calc.array('MARS') == 2]
        res += '{} {}:\n'.format(variable_header, var_str)
        res += mtr_bin_counts(mtr_ptax, PTAX_MTR_BIN_EDGES, recid)
        res += mtr_bin_counts(mtr_itax, ITAX_MTR_BIN_EDGES, recid)
    # check for differences between actual and expected results
    mtrres_path = os.path.join(tests_path, 'pufcsv_mtr_expect.txt')
    with open(mtrres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    if nonsmall_diffs(res.splitlines(True), expected_results.splitlines(True)):
        new_filename = '{}{}'.format(mtrres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(res)
        msg = 'PUFCSV MTR RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN pufcsv_mtr_actual.txt FILE ---\n'
        msg += '--- if new OK, copy pufcsv_mtr_actual.txt to  ---\n'
        msg += '---                 pufcsv_mtr_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
Beispiel #17
0
def test_agg(tests_path, puf_fullsample):
    """
    Test Tax-Calculator aggregate taxes with no policy reform using
    the full-sample puf.csv and a small sub-sample of puf.csv
    """
    # pylint: disable=too-many-locals,too-many-statements
    nyrs = 10
    # create a baseline Policy object containing 2017_law.json parameters
    pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json')
    pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None)
    baseline_policy = Policy()
    baseline_policy.implement_reform(pre_tcja['policy'])
    # create a Records object (rec) containing all puf.csv input records
    rec = Records(data=puf_fullsample)
    # create a Calculator object using baseline policy and puf records
    calc = Calculator(policy=baseline_policy, records=rec)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = calc.diagnostic_table(nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt results to a string with a trailing EOL character
    adtstr = adt.to_string() + '\n'
    # create actual and expected lists of diagnostic table lines
    actual = adtstr.splitlines(True)
    aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    expect = expected_results.splitlines(True)
    # ensure actual and expect lines have differences no more than small value
    if sys.version_info.major == 2:
        small = 0.0  # tighter test for Python 2.7
    else:
        small = 0.1  # looser test for Python 3.6
    diffs = nonsmall_diffs(actual, expect, small)
    if diffs:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(adtstr)
        msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy pufcsv_agg_actual.txt to  ---\n'
        msg += '---                 pufcsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using unweighted sub-sample of records
    fullsample = puf_fullsample
    rn_seed = 180  # to ensure sub-sample is always the same
    subfrac = 0.05  # sub-sample fraction
    subsample = fullsample.sample(frac=subfrac, random_state=rn_seed)
    rec_subsample = Records(data=subsample)
    calc_subsample = Calculator(policy=baseline_policy, records=rec_subsample)
    adt_subsample = calc_subsample.diagnostic_table(nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    reltol = 0.01  # maximum allowed relative difference in tax liability
    if not np.allclose(taxes_subsample, taxes_fullsample,
                       atol=0.0, rtol=reltol):
        msg = 'PUFCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n'
        msg += 'WHEN subfrac={:.3f}, rtol={:.4f}, seed={}\n'.format(subfrac,
                                                                    reltol,
                                                                    rn_seed)
        it_sub = np.nditer(taxes_subsample, flags=['f_index'])
        it_all = np.nditer(taxes_fullsample, flags=['f_index'])
        while not it_sub.finished:
            cyr = it_sub.index + calc_start_year
            tax_sub = float(it_sub[0])
            tax_all = float(it_all[0])
            reldiff = abs(tax_sub - tax_all) / abs(tax_all)
            if reldiff > reltol:
                msgstr = ' year,sub,full,reldiff= {}\t{:.2f}\t{:.2f}\t{:.4f}\n'
                msg += msgstr.format(cyr, tax_sub, tax_all, reldiff)
            it_sub.iternext()
            it_all.iternext()
        raise ValueError(msg)
def test_mtr(tests_path, puf_path):
    """
    Test Tax-Calculator marginal tax rates with no policy reform using puf.csv

    Compute histograms for each marginal tax rate income type using
    sample input from the puf.csv file and writing output to a string,
    which is then compared for differences with EXPECTED_MTR_RESULTS.
    """
    # pylint: disable=too-many-locals,too-many-statements
    assert len(PTAX_MTR_BIN_EDGES) == len(ITAX_MTR_BIN_EDGES)
    # construct actual results string, res
    res = ''
    if MTR_NEG_DIFF:
        res += 'MTR computed using NEGATIVE finite_diff '
    else:
        res += 'MTR computed using POSITIVE finite_diff '
    res += 'for tax year {}\n'.format(MTR_TAX_YEAR)
    # create a Policy object (clp) containing current-law policy parameters
    clp = Policy()
    clp.set_year(MTR_TAX_YEAR)
    # create a Records object (puf) containing puf.csv input records
    puf = Records(data=puf_path)
    recid = puf.RECID  # pylint: disable=no-member
    # create a Calculator object using clp policy and puf records
    calc = Calculator(policy=clp, records=puf)
    res += '{} = {}\n'.format('Total number of data records', puf.array_length)
    res += 'PTAX mtr histogram bin edges:\n'
    res += '     {}\n'.format(PTAX_MTR_BIN_EDGES)
    res += 'ITAX mtr histogram bin edges:\n'
    res += '     {}\n'.format(ITAX_MTR_BIN_EDGES)
    variable_header = 'PTAX and ITAX mtr histogram bin counts for'
    # compute marginal tax rate (mtr) histograms for each mtr variable
    for var_str in Calculator.MTR_VALID_VARIABLES:
        zero_out = (var_str == 'e01400')
        (mtr_ptax, mtr_itax, _) = calc.mtr(variable_str=var_str,
                                           negative_finite_diff=MTR_NEG_DIFF,
                                           zero_out_calculated_vars=zero_out,
                                           wrt_full_compensation=False)
        if zero_out:
            # check that calculated variables are consistent
            assert np.allclose((calc.array('iitax') +
                                calc.array('payrolltax')),
                               calc.array('combined'))
            assert np.allclose((calc.array('ptax_was') +
                                calc.array('setax') +
                                calc.array('ptax_amc')),
                               calc.array('payrolltax'))
            assert np.allclose(calc.array('c21060') - calc.array('c21040'),
                               calc.array('c04470'))
            assert np.allclose(calc.array('taxbc') + calc.array('c09600'),
                               calc.array('c05800'))
            assert np.allclose((calc.array('c05800') +
                                calc.array('othertaxes') -
                                calc.array('c07100')),
                               calc.array('c09200'))
            assert np.allclose(calc.array('c09200') - calc.array('refund'),
                               calc.array('iitax'))
        if var_str == 'e00200s':
            # only MARS==2 filing units have valid MTR values
            mtr_ptax = mtr_ptax[calc.array('MARS') == 2]
            mtr_itax = mtr_itax[calc.array('MARS') == 2]
        res += '{} {}:\n'.format(variable_header, var_str)
        res += mtr_bin_counts(mtr_ptax, PTAX_MTR_BIN_EDGES, recid)
        res += mtr_bin_counts(mtr_itax, ITAX_MTR_BIN_EDGES, recid)
    # check for differences between actual and expected results
    mtrres_path = os.path.join(tests_path, 'pufcsv_mtr_expect.txt')
    with open(mtrres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    if nonsmall_diffs(res.splitlines(True), expected_results.splitlines(True)):
        new_filename = '{}{}'.format(mtrres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(res)
        msg = 'PUFCSV MTR RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN pufcsv_mtr_actual.txt FILE ---\n'
        msg += '--- if new OK, copy pufcsv_mtr_actual.txt to  ---\n'
        msg += '---                 pufcsv_mtr_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)