Esempio n. 1
0
def test_diff_table_sum_row(puf_1991, weights_1991):
    # create a current-law Policy object and Calculator calc1
    policy1 = Policy()
    records1 = Records(data=puf_1991, weights=weights_1991, start_year=2009)
    calc1 = Calculator(policy=policy1, records=records1)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator calc2
    reform = {2013: {'_II_rt4': [0.56]}}
    policy2 = Policy()
    policy2.implement_reform(reform)
    records2 = Records(data=puf_1991, weights=weights_1991, start_year=2009)
    calc2 = Calculator(policy=policy2, records=records2)
    calc2.calc_all()
    # create two difference tables and compare their content
    tdiff1 = create_difference_table(calc1.records,
                                     calc2.records,
                                     groupby='small_income_bins')
    tdiff2 = create_difference_table(calc1.records,
                                     calc2.records,
                                     groupby='large_income_bins')
    non_digit_cols = [
        'mean', 'perc_inc', 'perc_cut', 'share_of_change', 'aftertax_perc'
    ]
    digit_cols = [
        x for x in tdiff1.columns.tolist() if x not in non_digit_cols
    ]
    assert np.allclose(tdiff1[digit_cols][-1:], tdiff2[digit_cols][-1:])
    assert_array_equal(tdiff1[non_digit_cols][-1:],
                       tdiff2[non_digit_cols][-1:])
Esempio n. 2
0
def test_diff_table_sum_row(cps_subsample):
    rec = Records.cps_constructor(data=cps_subsample)
    # create a current-law Policy object and Calculator calc1
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator calc2
    reform = {'II_rt4': {2013: 0.56}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()
    # create three difference tables and compare their content
    dv1 = calc1.dataframe(DIFF_VARIABLES)
    dv2 = calc2.dataframe(DIFF_VARIABLES)
    dt1 = create_difference_table(dv1, dv2, 'standard_income_bins', 'iitax')
    dt2 = create_difference_table(dv1, dv2, 'soi_agi_bins', 'iitax')
    dt3 = create_difference_table(dv1,
                                  dv2,
                                  'weighted_deciles',
                                  'iitax',
                                  pop_quantiles=False)
    dt4 = create_difference_table(dv1,
                                  dv2,
                                  'weighted_deciles',
                                  'iitax',
                                  pop_quantiles=True)
    assert np.allclose(dt1.loc['ALL'], dt2.loc['ALL'])
    assert np.allclose(dt1.loc['ALL'], dt3.loc['ALL'])
    # make sure population count is larger than filing-unit count
    assert dt4.at['ALL', 'count'] > dt1.at['ALL', 'count']
Esempio n. 3
0
def test_diff_table_sum_row(cps_subsample):
    rec = Records.cps_constructor(data=cps_subsample)
    # create a current-law Policy object and Calculator calc1
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator calc2
    reform = {2013: {'_II_rt4': [0.56]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()
    # create two difference tables and compare their content
    tdiff1 = create_difference_table(calc1.records,
                                     calc2.records,
                                     groupby='small_income_bins',
                                     income_measure='expanded_income',
                                     tax_to_diff='iitax')
    tdiff2 = create_difference_table(calc1.records,
                                     calc2.records,
                                     groupby='large_income_bins',
                                     income_measure='expanded_income',
                                     tax_to_diff='iitax')
    non_digit_cols = [
        'mean', 'perc_inc', 'perc_cut', 'share_of_change', 'perc_aftertax',
        'pc_aftertaxinc'
    ]
    digit_cols = [c for c in list(tdiff1) if c not in non_digit_cols]
    assert np.allclose(tdiff1[digit_cols][-1:], tdiff2[digit_cols][-1:])
    np.testing.assert_array_equal(tdiff1[non_digit_cols][-1:],
                                  tdiff2[non_digit_cols][-1:])
Esempio n. 4
0
 def differences_table(self,
                       year: int,
                       groupby: str,
                       tax_to_diff: str,
                       pop_quantiles: bool = False) -> pd.DataFrame:
     """
     Method to create a differences table
     Parameters
     ----------
     year: which year the difference table should be from
     groupby: determines how the rows in the table are sorted
         options: 'weighted_deciles', 'standard_income_bins', 'soi_agi_bin'
     tax_to_diff: which tax to take the difference of
         options: 'iitax', 'payrolltax', 'combined'
     pop_quantiles: whether weighted_deciles contain an equal number of tax
         units (False) or people (True)
     Returns
     -------
     DataFrame containing a differences table
     """
     base_data = self.base_data[year]
     reform_data = self.reform_data[year]
     table = create_difference_table(base_data, reform_data, groupby,
                                     tax_to_diff, pop_quantiles)
     return table
Esempio n. 5
0
def summary_diff_xdec(res, df1, df2):
    """
    res is dictionary of summary-results DataFrames.
    df1 contains results variables for baseline policy.
    df2 contains results variables for reform policy.
    returns augmented dictionary of summary-results DataFrames.
    """
    # create difference tables grouped by xdec
    res['diff_itax_xdec'] = \
        create_difference_table(df1, df2, 'weighted_deciles', 'iitax')
    res['diff_ptax_xdec'] = \
        create_difference_table(df1, df2, 'weighted_deciles', 'payrolltax')
    res['diff_comb_xdec'] = \
        create_difference_table(df1, df2, 'weighted_deciles', 'combined')
    # return res dictionary
    return res
    def difference_table(self, calc, groupby, tax_to_diff):
        """
        Get results from self and calc, sort them by expanded_income into
        table rows defined by groupby, compute grouped statistics, and
        return tax-difference table as a Pandas dataframe.
        This method leaves the Calculator objects unchanged.
        Note that the returned tables have consistent income groups (based
        on the self expanded_income) even though the baseline expanded_income
        in self and the reform expanded_income in calc are different.

        Parameters
        ----------
        calc : Calculator object
            calc represents the reform while self represents the baseline

        groupby : String object
            options for input: 'weighted_deciles', 'standard_income_bins'
            determines how the columns in resulting Pandas DataFrame are sorted

        tax_to_diff : String object
            options for input: 'iitax', 'payrolltax', 'combined'
            specifies which tax to difference

        Returns and typical usage
        -------------------------
        diff = calc1.difference_table(calc2, 'weighted_deciles', 'iitax')
        (where calc1 is a baseline Calculator object
        and calc2 is a reform Calculator object).
        The returned diff is a difference table as a Pandas DataFrame
        with DIST_TABLE_COLUMNS and groupby rows.
        NOTE: when groupby is 'weighted_deciles', the returned table has three
              extra rows containing top-decile detail consisting of statistics
              for the 0.90-0.95 quantile range (bottom half of top decile),
              for the 0.95-0.99 quantile range, and
              for the 0.99-1.00 quantile range (top one percent); and the
              returned table splits the bottom decile into filing units with
              negative (denoted by a 0-10n row label),
              zero (denoted by a 0-10z row label), and
              positive (denoted by a 0-10p row label) values of the
              specified income_measure.
        """
        assert isinstance(calc, Calculator)
        assert calc.current_year == self.current_year
        assert calc.array_len == self.array_len
        self_var_dataframe = self.dataframe(DIFF_VARIABLES)
        calc_var_dataframe = calc.dataframe(DIFF_VARIABLES)
        diff = create_difference_table(self_var_dataframe,
                                       calc_var_dataframe,
                                       groupby, tax_to_diff)
        del self_var_dataframe
        del calc_var_dataframe
        return diff
Esempio n. 7
0
def xtest_diff_table_sum_row(pit_subsample):
    rec = Records(data=pit_subsample)
    # create a current-law Policy object and Calculator calc1
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator calc2
    reform = {2017: {'_rate2': [0.06]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()
    # create two difference tables and compare their content
    tdiff1 = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                     calc2.dataframe(DIFF_VARIABLES),
                                     'standard_income_bins', 'iitax')
    tdiff2 = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                     calc2.dataframe(DIFF_VARIABLES),
                                     'soi_agi_bins', 'iitax')
    non_digit_cols = ['perc_inc', 'perc_cut']
    digit_cols = [c for c in list(tdiff1) if c not in non_digit_cols]
    assert np.allclose(tdiff1[digit_cols][-1:], tdiff2[digit_cols][-1:])
    np.allclose(tdiff1[non_digit_cols][-1:], tdiff2[non_digit_cols][-1:])
def test_diff_table_sum_row(cps_subsample):
    rec = Records.cps_constructor(data=cps_subsample)
    # create a current-law Policy object and Calculator calc1
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator calc2
    reform = {'II_rt4': {2013: 0.56}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()
    # create two difference tables and compare their content
    tdiff1 = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                     calc2.dataframe(DIFF_VARIABLES),
                                     'standard_income_bins', 'iitax')
    tdiff2 = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                     calc2.dataframe(DIFF_VARIABLES),
                                     'soi_agi_bins', 'iitax')
    non_digit_cols = ['perc_inc', 'perc_cut']
    digit_cols = [c for c in list(tdiff1) if c not in non_digit_cols]
    assert np.allclose(tdiff1[digit_cols][-1:],
                       tdiff2[digit_cols][-1:])
    np.allclose(tdiff1[non_digit_cols][-1:],
                tdiff2[non_digit_cols][-1:])
def test_diff_table_sum_row(cps_subsample):
    rec = Records.cps_constructor(data=cps_subsample)
    # create a current-law Policy object and Calculator calc1
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator calc2
    reform = {'II_rt4': {2013: 0.56}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()
    # create three difference tables and compare their content
    dv1 = calc1.dataframe(DIFF_VARIABLES)
    dv2 = calc2.dataframe(DIFF_VARIABLES)
    dt1 = create_difference_table(dv1, dv2, 'standard_income_bins', 'iitax')
    dt2 = create_difference_table(dv1, dv2, 'soi_agi_bins', 'iitax')
    dt3 = create_difference_table(dv1, dv2, 'weighted_deciles', 'iitax',
                                  pop_quantiles=False)
    dt4 = create_difference_table(dv1, dv2, 'weighted_deciles', 'iitax',
                                  pop_quantiles=True)
    assert np.allclose(dt1.loc['ALL'], dt2.loc['ALL'])
    assert np.allclose(dt1.loc['ALL'], dt3.loc['ALL'])
    # make sure population count is larger than filing-unit count
    assert dt4.at['ALL', 'count'] > dt1.at['ALL', 'count']
Esempio n. 10
0
def test_create_tables(puf_1991, weights_1991):
    # create a current-law Policy object and Calculator object calc1
    policy1 = Policy()
    records1 = Records(data=puf_1991, weights=weights_1991, start_year=2009)
    calc1 = Calculator(policy=policy1, records=records1)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt4': [0.56]}}
    policy2 = Policy()
    policy2.implement_reform(reform)
    records2 = Records(data=puf_1991, weights=weights_1991, start_year=2009)
    calc2 = Calculator(policy=policy2, records=records2)
    calc2.calc_all()
    # test creating various distribution tables
    dt1 = create_difference_table(calc1.records,
                                  calc2.records,
                                  groupby='large_income_bins')
    assert isinstance(dt1, pd.DataFrame)
    dt2 = create_difference_table(calc1.records,
                                  calc2.records,
                                  groupby='webapp_income_bins')
    assert isinstance(dt2, pd.DataFrame)
    with pytest.raises(ValueError):
        create_difference_table(calc1.records,
                                calc2.records,
                                groupby='bad_bins')
    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='small_income_bins',
                                  result_type='bad_result_type')
    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='bad_bins',
                                  result_type='weighted_sum')
    dt3 = create_distribution_table(calc2.records,
                                    groupby='small_income_bins',
                                    result_type='weighted_sum',
                                    baseline_obj=calc1.records,
                                    diffs=True)
    assert isinstance(dt3, pd.DataFrame)
    calc1.increment_year()
    with pytest.raises(ValueError):
        create_difference_table(calc1.records,
                                calc2.records,
                                groupby='large_income_bins')
    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='small_income_bins',
                                  result_type='weighted_sum',
                                  baseline_obj=calc1.records,
                                  diffs=True)
Esempio n. 11
0
 def differences_table(self, year, groupby, tax_to_diff):
     """
     Method to create a differences table
     Parameters
     ----------
     year: which year the difference table should be from
     groupby: determines how the rows in the table are sorted
         options: 'weighted_deciles', 'standard_income_bins', 'soi_agi_bin'
     tax_to_diff: which tax to take the difference of
         options: 'iitax', 'payrolltax', 'combined'
     run_type: use data from the static or dynamic run
     Returns
     -------
     DataFrame containing a differences table
     """
     base_data = self.base_data[year]
     reform_data = self.reform_data[year]
     table = create_difference_table(base_data, reform_data, groupby,
                                     tax_to_diff)
     return table
Esempio n. 12
0
def test_create_tables(cps_subsample):
    # create a current-law Policy object and Calculator object calc1
    policy1 = Policy()
    records1 = Records.cps_constructor(data=cps_subsample)
    calc1 = Calculator(policy=policy1, records=records1)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt1': [0.15]}}
    policy2 = Policy()
    policy2.implement_reform(reform)
    records2 = Records.cps_constructor(data=cps_subsample)
    calc2 = Calculator(policy=policy2, records=records2)
    calc2.calc_all()

    # test creating various difference tables
    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='large_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        '0.00%', '0.01%', '0.41%', '0.84%', '0.92%', '1.10%', '1.15%', '1.04%',
        '0.78%', '0.27%', 'n/a'
    ]
    assert np.array_equal(diff['perc_aftertax'], expected)

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='webapp_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        '0.00%', '0.01%', '0.41%', '0.84%', '0.92%', '1.10%', '1.15%', '1.04%',
        '0.78%', '0.30%', '0.08%', '0.07%', 'n/a'
    ]
    assert np.array_equal(diff['perc_aftertax'], expected)

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='small_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        '0.00%', '0.01%', '0.02%', '0.16%', '0.64%', '0.82%', '0.87%', '0.92%',
        '1.10%', '1.15%', '1.04%', '0.78%', '0.30%', '0.08%', '0.09%', '0.07%',
        '0.05%', '0.02%', '0.00%', 'n/a'
    ]
    assert np.array_equal(diff['perc_aftertax'], expected)

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='weighted_deciles',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        '0.00%', '0.02%', '0.35%', '0.79%', '0.89%', '0.97%', '1.11%', '1.18%',
        '0.91%', '0.50%', 'n/a'
    ]
    assert np.array_equal(diff['perc_aftertax'], expected)

    with pytest.raises(ValueError):
        create_difference_table(calc1.records,
                                calc2.records,
                                groupby='bad_bins',
                                income_measure='expanded_income',
                                tax_to_diff='iitax')

    # test creating various distribution tables
    dist = create_distribution_table(calc2.records,
                                     groupby='weighted_deciles',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        -8851215, -99666120, -123316561, -85895787, -47357458, 207462144,
        443391189, 978487989, 1709504845, 7631268907, 10605027933
    ]
    assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0)
    expected = [
        1202, 1688, 13506, 18019, 30130, 48244, 80994, 112788, 131260, 146001,
        583832
    ]
    assert np.allclose(dist['num_returns_ItemDed'].tolist(),
                       expected,
                       atol=0.5,
                       rtol=0.0)

    dist = create_distribution_table(calc2.records,
                                     groupby='webapp_income_bins',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        -103274, -83144506, -152523834, -129881470, 85802556, 255480678,
        832529135, 1066963515, 3023956558, 2876331264, 1008672459, 1820944852,
        10605027933
    ]
    assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0)
    expected = [
        0, 1202, 22654, 31665, 30547, 49851, 124786, 97349, 160147, 56806,
        5803, 3023, 583832
    ]
    assert np.allclose(dist['num_returns_ItemDed'].tolist(),
                       expected,
                       atol=0.5,
                       rtol=0.0)

    setattr(calc2.records, 'expanded_income_baseline',
            getattr(calc2.records, 'expanded_income'))
    dist = create_distribution_table(calc2.records,
                                     groupby='webapp_income_bins',
                                     income_measure='expanded_income_baseline',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)

    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='small_income_bins',
                                  income_measure='expanded_income',
                                  result_type='bad_result_type')

    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='bad_bins',
                                  income_measure='expanded_income',
                                  result_type='weighted_sum')
Esempio n. 13
0
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt1': [0.15]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    # test creating various difference tables

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='large_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0.00, 0.01, 0.41, 0.84, 0.92, 1.10, 1.15, 1.04, 0.78, 0.27, np.nan
    ]
    assert np.allclose(diff['perc_aftertax'],
                       expected,
                       atol=0.005,
                       rtol=0.0,
                       equal_nan=True)

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='webapp_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0.00, 0.01, 0.41, 0.84, 0.92, 1.10, 1.15, 1.04, 0.78, 0.30, 0.08, 0.07,
        np.nan
    ]
    assert np.allclose(diff['perc_aftertax'],
                       expected,
                       atol=0.005,
                       rtol=0.0,
                       equal_nan=True)

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='small_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0.00, 0.01, 0.02, 0.16, 0.64, 0.82, 0.87, 0.92, 1.10, 1.15, 1.04, 0.78,
        0.30, 0.08, 0.09, 0.07, 0.05, 0.02, 0.00, np.nan
    ]
    assert np.allclose(diff['perc_aftertax'],
                       expected,
                       atol=0.005,
                       rtol=0.0,
                       equal_nan=True)

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='weighted_deciles',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        14931, 276555, 7728872, 22552703, 34008512, 50233787, 76811377,
        111167087, 123226970, 111414038, 537434832, 66560891, 39571078, 5282069
    ]
    assert np.allclose(diff['tot_change'], expected, atol=0.5, rtol=0.0)
    expected = [
        0.00, 0.05, 1.44, 4.20, 6.33, 9.35, 14.29, 20.68, 22.93, 20.73, 100.00,
        12.38, 7.36, 0.98
    ]
    assert np.allclose(diff['share_of_change'], expected, atol=0.005, rtol=0.0)
    expected = [
        0.00, 0.02, 0.35, 0.79, 0.89, 0.97, 1.11, 1.18, 0.91, 0.50, np.nan,
        0.70, 0.37, 0.06
    ]
    assert np.allclose(diff['perc_aftertax'],
                       expected,
                       atol=0.005,
                       rtol=0.0,
                       equal_nan=True)
    expected = [
        -0.00, -0.02, -0.35, -0.79, -0.89, -0.97, -1.11, -1.18, -0.91, -0.50,
        np.nan, -0.70, -0.37, -0.06
    ]
    assert np.allclose(diff['pc_aftertaxinc'],
                       expected,
                       atol=0.005,
                       rtol=0.0,
                       equal_nan=True)

    with pytest.raises(ValueError):
        create_difference_table(calc1.records,
                                calc2.records,
                                groupby='bad_bins',
                                income_measure='expanded_income',
                                tax_to_diff='iitax')

    # test creating various distribution tables

    dist = create_distribution_table(calc2.records,
                                     groupby='weighted_deciles',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)

    expected = [
        -8851215, -99666120, -123316561, -85895787, -47357458, 207462144,
        443391189, 978487989, 1709504845, 7631268907, 10605027933, 4171055704,
        2751003155, 709210048
    ]
    assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0)
    expected = [
        1202, 1688, 13506, 18019, 30130, 48244, 80994, 112788, 131260, 146001,
        583832, 75279, 56819, 13903
    ]
    assert np.allclose(dist['num_returns_ItemDed'].tolist(),
                       expected,
                       atol=0.5,
                       rtol=0.0)
    expected = [
        158456013, 1351981790, 2383726863, 3408544081, 4569232020, 6321944661,
        8520304098, 11817197884, 17299173380, 41117720202, 96948280992,
        21687950798, 15093608351, 4336161053
    ]
    assert np.allclose(dist['expanded_income'].tolist(),
                       expected,
                       atol=0.5,
                       rtol=0.0)
    expected = [
        147367698, 1354827269, 2351611947, 3192405234, 4157431713, 5454468907,
        7125788590, 9335613303, 13417244946, 29691084873, 76227844481,
        15608893056, 10854804442, 3227387375
    ]
    assert np.allclose(dist['aftertax_income'].tolist(),
                       expected,
                       atol=0.5,
                       rtol=0.0)

    dist = create_distribution_table(calc2.records,
                                     groupby='webapp_income_bins',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        -103274, -83144506, -152523834, -129881470, 85802556, 255480678,
        832529135, 1066963515, 3023956558, 2876331264, 1008672459, 1820944852,
        10605027933
    ]
    assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0)
    expected = [
        0, 1202, 22654, 31665, 30547, 49851, 124786, 97349, 160147, 56806,
        5803, 3023, 583832
    ]
    assert np.allclose(dist['num_returns_ItemDed'].tolist(),
                       expected,
                       atol=0.5,
                       rtol=0.0)

    setattr(calc2.records, 'expanded_income_baseline',
            getattr(calc2.records, 'expanded_income'))
    dist = create_distribution_table(calc2.records,
                                     groupby='webapp_income_bins',
                                     income_measure='expanded_income_baseline',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)

    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='small_income_bins',
                                  income_measure='expanded_income',
                                  result_type='bad_result_type')

    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='bad_bins',
                                  income_measure='expanded_income',
                                  result_type='weighted_sum')
Esempio n. 14
0
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt1': [0.15]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='large_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [np.nan,
                np.nan,
                -0.14,
                -0.58,
                -0.71,
                -0.70,
                -0.83,
                -0.81,
                -0.73,
                -0.65,
                -0.18,
                -0.59]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.005, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='standard_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [np.nan,
                np.nan,
                -0.14,
                -0.58,
                -0.71,
                -0.70,
                -0.83,
                -0.81,
                -0.73,
                -0.65,
                -0.23,
                -0.09,
                -0.06,
                -0.59]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.005, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='small_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [np.nan,
                np.nan,
                -0.29,
                -0.07,
                -0.23,
                -0.78,
                -0.66,
                -0.74,
                -0.70,
                -0.83,
                -0.81,
                -0.73,
                -0.65,
                -0.23,
                -0.09,
                -0.08,
                -0.07,
                -0.05,
                -0.02,
                np.nan,
                -0.59]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.005, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='weighted_deciles',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [0,
                0,
                1037894,
                16199646,
                25518793,
                34455230,
                49661093,
                62344194,
                82290396,
                90006817,
                117415735,
                101818106,
                580747904,
                62408600,
                33771695,
                5637811]
    tabcol = 'tot_change'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.51, rtol=0.0):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [0.00,
                0.00,
                0.18,
                2.79,
                4.39,
                5.93,
                8.55,
                10.74,
                14.17,
                15.50,
                20.22,
                17.53,
                100.00,
                10.75,
                5.82,
                0.97]
    tabcol = 'share_of_change'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.005, rtol=0.0):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))
    expected = [np.nan,
                np.nan,
                -0.13,
                -0.65,
                -0.68,
                -0.71,
                -0.79,
                -0.80,
                -0.82,
                -0.71,
                -0.71,
                -0.30,
                -0.59,
                -0.55,
                -0.25,
                -0.06]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.005, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))
    expected = [np.nan,
                np.nan,
                -0.13,
                -0.65,
                -0.68,
                -0.71,
                -0.79,
                -0.80,
                -0.82,
                -0.71,
                -0.71,
                -0.30,
                -0.59,
                -0.55,
                -0.25,
                -0.06]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.005, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    # test creating various distribution tables

    dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES),
                                     groupby='weighted_deciles',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [0,
                0,
                -54678669,
                -64005792,
                -64426464,
                32739840,
                207396898,
                317535861,
                575238615,
                984782596,
                1731373913,
                7082515174,
                10748471972,
                1622921432,
                2217477146,
                3242116596]
    tabcol = 'iitax'
    if not np.allclose(dist[tabcol].values, expected,
                       atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [0,
                0,
                2561,
                13268,
                21368,
                28377,
                53186,
                60433,
                79779,
                91010,
                117445,
                128784,
                596211,
                63766,
                51681,
                13337]
    tabcol = 'num_returns_ItemDed'
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [0,
                0,
                836765692,
                2661991174,
                3978757611,
                5306258004,
                7022134388,
                8871843614,
                11530190180,
                14721635194,
                19860290487,
                44177752076,
                118967618420,
                14296456955,
                16895894429,
                12985400692]
    tabcol = 'expanded_income'
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [0,
                0,
                821526457,
                2483359936,
                3714540881,
                4821394144,
                6200512981,
                7763298300,
                9921184240,
                12527297334,
                16314596486,
                33886371300,
                98454082058,
                11265497052,
                13416447851,
                9204426396]
    tabcol = 'aftertax_income'
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES),
                                     groupby='standard_income_bins',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [0,
                0,
                -43150804,
                -77526808,
                -64845122,
                43303823,
                225370761,
                723847940,
                1098042284,
                3264499170,
                2808160213,
                950296405,
                1820474110,
                10748471972]
    tabcol = 'iitax'
    if not np.allclose(dist[tabcol], expected,
                       atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [0,
                0,
                1202,
                13614,
                27319,
                33655,
                50186,
                116612,
                103896,
                181192,
                60527,
                5126,
                2882,
                596211]
    tabcol = 'num_returns_ItemDed'
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    if test_failure:
        assert 1 == 2
Esempio n. 15
0
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt1': [0.15]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'standard_income_bins', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'pc_aftertaxinc'
    expected = [
        np.nan, np.nan, -0.16, -0.57, -0.72, -0.69, -0.82, -0.80, -0.75, -0.65,
        -0.23, -0.09, -0.06, -0.59
    ]
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff xbin', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'weighted_deciles', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'tot_change'
    expected = [
        0, 0, 1219678, 15503037, 25922077, 35000592, 48336897, 62637728,
        79750078, 93136108, 116996252, 102458801, 580961247, 63156380,
        33664610, 5637811
    ]
    if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'share_of_change'
    expected = [
        0.00, 0.00, 0.21, 2.67, 4.46, 6.02, 8.32, 10.78, 13.73, 16.03, 20.14,
        17.64, 100.00, 10.87, 5.79, 0.97
    ]
    if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    tabcol = 'pc_aftertaxinc'
    expected = [
        np.nan, np.nan, -0.15, -0.62, -0.70, -0.73, -0.78, -0.80, -0.80, -0.74,
        -0.71, -0.30, -0.59, -0.55, -0.25, -0.06
    ]
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    tabcol = 'pc_aftertaxinc'
    expected = [
        np.nan, np.nan, -0.15, -0.62, -0.70, -0.73, -0.78, -0.80, -0.80, -0.74,
        -0.71, -0.30, -0.59, -0.55, -0.25, -0.06
    ]
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    # test creating various distribution tables

    dist, _ = calc2.distribution_tables(None, 'weighted_deciles')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [
        0, 0, -53644343, -65258622, -57617119, 37391333, 200879230, 329784586,
        553827330, 1015854407, 1731283600, 7090603505, 10783103907, 1638192777,
        2213960052, 3238450675
    ]
    if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'num_returns_ItemDed'
    expected = [
        0, 0, 2561, 12610, 21936, 29172, 50890, 61563, 78247, 91823, 118523,
        128886, 596211, 63986, 51634, 13266
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'expanded_income'
    expected = [
        0, 0, 835224673, 2639667638, 3940559051, 5286856071, 6972849344,
        8881099529, 11467767759, 14761195525, 19832126806, 44213000235,
        118830346631, 14399218059, 16868648076, 12945134101
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'aftertax_income'
    expected = [
        0, 0, 818813684, 2466000535, 3671150517, 4790979126, 6173998985,
        7754183496, 9907604744, 12510477225, 16273592612, 33915377411,
        98282178334, 11345456373, 13400757263, 9169163776
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    dist, _ = calc2.distribution_tables(None, 'standard_income_bins')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [
        0, 0, -42244205, -76727831, -62581860, 53797887, 217016689, 723516183,
        1108097059, 3272479928, 2818979541, 950296405, 1820474110, 10783103907
    ]
    if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'num_returns_ItemDed'
    expected = [
        0, 0, 1202, 13614, 27272, 34407, 48265, 117225, 103319, 181885, 61014,
        5126, 2882, 596211
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    if test_failure:
        assert 1 == 2
Esempio n. 16
0
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt1': [0.15]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'standard_income_bins', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'pc_aftertaxinc'
    expected = [
        np.nan, np.nan, -0.22, -0.77, -0.80, -0.56, -0.77, -0.69, -0.71, -0.67,
        -0.27, -0.11, -0.06, -0.58
    ]
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff xbin', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'weighted_deciles', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'tot_change'
    expected = [
        0, 0, 241418460, 2474292614, 2770584237, 2535721686, 4444363117,
        5111483934, 6321945100, 8225913647, 10597074824, 10234573879,
        52957371499, 6137031947, 3513242382, 584299551
    ]
    if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'share_of_change'
    expected = [
        0.00, 0.00, 0.46, 4.67, 5.23, 4.79, 8.39, 9.65, 11.94, 15.53, 20.01,
        19.33, 100.00, 11.59, 6.63, 1.10
    ]
    if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    tabcol = 'pc_aftertaxinc'
    expected = [
        np.nan, np.nan, -0.25, -0.95, -0.73, -0.53, -0.75, -0.71, -0.68, -0.72,
        -0.71, -0.34, -0.58, -0.61, -0.30, -0.07
    ]
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    tabcol = 'pc_aftertaxinc'
    expected = [
        np.nan, np.nan, -0.25, -0.95, -0.73, -0.53, -0.75, -0.71, -0.68, -0.72,
        -0.71, -0.34, -0.58, -0.61, -0.30, -0.07
    ]
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    # test creating various distribution tables

    dist, _ = calc2.distribution_tables(None, 'weighted_deciles')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [
        0, 0, -1962728575, 1725493747, 4396953820, 6605728718, 16774691083,
        23860454276, 38350836962, 83963523110, 150930070726, 732859528574,
        1057504552440, 151607017873, 234865455600, 346387055100
    ]
    if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'num_returns_ItemDed'
    expected = [
        0, 0, 357019, 1448655, 2559613, 2513429, 4419624, 5275374, 6222375,
        7880642, 11147728, 13023015, 54847474, 6118072, 5478575, 1426368
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'expanded_income'
    expected = [
        0, 0, 105133510325, 290616204980, 413576297349, 517828725223,
        659857915773, 803218163892, 1042123266101, 1326558509787,
        1805622773921, 4048576203396, 11013111570748, 1281956155093,
        1515893182747, 1250726865556
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'aftertax_income'
    expected = [
        0, 0, 97194414402, 259232286042, 375389146785, 475719938941,
        588002960320, 710495874184, 921012587826, 1129166044052, 1488817328688,
        2998268537784, 9043299119027, 999335257953, 1151930187406, 847003092425
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    dist, _ = calc2.distribution_tables(None, 'standard_income_bins')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [
        0, 0, -544908512, -27720182, 2473153905, 9469043966, 17806661306,
        56292468689, 88558244888, 298427035609, 290639143539, 99528466942,
        194882962290, 1057504552440
    ]
    if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'num_returns_ItemDed'
    expected = [
        0, 0, 60455, 1161281, 2323042, 3613216, 4759193, 10006287, 8785946,
        17093586, 6199555, 532925, 311987, 54847474
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    if test_failure:
        assert 1 == 2
Esempio n. 17
0
def xtest_create_tables(pit_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records(data=pit_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2017: {'_rate2': [0.06]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'standard_income_bins', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'pc_aftertaxinc'
    expected = [
        np.nan, np.nan, -0.24, -0.79, -0.81, -0.55, -0.77, -0.69, -0.70, -0.67,
        -0.27, -0.11, -0.06, -0.58
    ]
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff xbin', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'weighted_deciles', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'tot_change'
    expected = [
        0, 0, 254095629, 2544151690, 2826173357, 2539574809, 4426339426,
        5178198524, 6277367974, 8069273960, 10572653961, 10269542170,
        52957371499, 6188055374, 3497187245, 584299551
    ]
    if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'share_of_change'
    expected = [
        0.00, 0.00, 0.48, 4.80, 5.34, 4.80, 8.36, 9.78, 11.85, 15.24, 19.96,
        19.39, 100.00, 11.68, 6.60, 1.10
    ]
    if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    tabcol = 'pc_aftertaxinc'
    expected = [
        np.nan, np.nan, -0.26, -0.96, -0.74, -0.52, -0.75, -0.71, -0.68, -0.71,
        -0.71, -0.34, -0.58, -0.61, -0.30, -0.07
    ]
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    # test creating various distribution tables

    dist, _ = calc2.distribution_tables(None, 'weighted_deciles')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [
        0, 0, -1818680093, 1971755805, 5010676892, 6746034269, 17979713134,
        26281107130, 35678824858, 82705314943, 148818900147, 734130905355,
        1057504552440, 152370476198, 234667184101, 347093245055
    ]
    if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'num_returns_ItemDed'
    expected = [
        0, 0, 357019, 1523252, 2463769, 2571339, 4513934, 5278763, 6299826,
        7713038, 11001450, 13125085, 54847474, 6188702, 5498415, 1437967
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'expanded_income'
    expected = [
        0, 0, 105927980655, 294023675202, 417068113194, 528376852001,
        658853731628, 818158430558, 1037838578149, 1324689584778,
        1788101751565, 4047642990187, 11020681687917, 1286581399758,
        1511884268254, 1249177322176
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'aftertax_income'
    expected = [
        0, 0, 97735000432, 261911925318, 378049091828, 485619641327,
        586208937799, 722979233740, 919208533243, 1130705156084, 1473967098213,
        2994484618211, 9050869236196, 1002450732282, 1147596725957,
        844437159972
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    dist, _ = calc2.distribution_tables(None, 'standard_income_bins')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [
        0, 0, -544908512, -140959365, 3354006293, 9339571323, 18473567840,
        55206201916, 89276157367, 297973932010, 290155554334, 99528466942,
        194882962290, 1057504552440
    ]
    if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    tabcol = 'num_returns_ItemDed'
    expected = [
        0, 0, 60455, 1107780, 2366845, 3460607, 4837397, 10090391, 8850784,
        17041135, 6187168, 532925, 311987, 54847474
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    if test_failure:
        assert 1 == 2
Esempio n. 18
0
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt1': [0.15]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='large_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0.00, -0.14, -0.58, -0.70, -0.71, -0.81, -0.83, -0.74, -0.65, -0.18,
        -0.59
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='standard_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0.00, -0.14, -0.58, -0.70, -0.71, -0.81, -0.83, -0.74, -0.65, -0.23,
        -0.09, -0.06, -0.59
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='small_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0.00, -0.29, -0.07, -0.22, -0.80, -0.65, -0.74, -0.71, -0.81, -0.83,
        -0.74, -0.65, -0.23, -0.09, -0.08, -0.07, -0.05, -0.02, 0.00, -0.59
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='weighted_deciles',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0, 855188, 15425829, 26212078, 33369237, 50208703, 63312937, 82312360,
        90711899, 117518598, 101779164, 581705993, 62142547, 33919755, 5716862
    ]
    tabcol = 'tot_change'
    if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0.00, 0.15, 2.65, 4.51, 5.74, 8.63, 10.88, 14.15, 15.59, 20.20, 17.50,
        100.00, 10.68, 5.83, 0.98
    ]
    tabcol = 'share_of_change'
    if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))
    expected = [
        0.00, -0.11, -0.62, -0.71, -0.69, -0.81, -0.82, -0.82, -0.72, -0.71,
        -0.30, -0.59, -0.55, -0.25, -0.06
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))
    expected = [
        0.00, -0.11, -0.62, -0.71, -0.69, -0.81, -0.82, -0.82, -0.72, -0.71,
        -0.30, -0.59, -0.55, -0.25, -0.06
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    # test creating various distribution tables

    dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES),
                                     groupby='weighted_deciles',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        0, -56140397, -67237556, -58897159, 17222017, 212673684, 328116256,
        573255089, 992965515, 1730626734, 7142993526, 10815577709, 1625179635,
        2241659962, 3276153930
    ]
    tabcol = 'iitax'
    if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 1202, 13981, 21932, 27445, 52318, 62509, 79749, 91861, 117068,
        129463, 597527, 63940, 52137, 13387
    ]
    tabcol = 'num_returns_ItemDed'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 812766585, 2639118220, 3940557055, 5243088362, 6988752253,
        8827238879, 11605062543, 14729565181, 19894042635, 44374875397,
        119055067109, 14255277238, 17039539254, 13080058905
    ]
    tabcol = 'expanded_income'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 801755209, 2466382489, 3674186760, 4779876836, 6150380331,
        7701226391, 10000914935, 12515316309, 16352910962, 34006973974,
        98449924197, 11219604941, 13525917494, 9261451538
    ]
    tabcol = 'aftertax_income'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES),
                                     groupby='standard_income_bins',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        0, -44670465, -79534586, -61791623, 34666275, 216487136, 742113595,
        1099657851, 3270948526, 2826393721, 962881064, 1848426216, 10815577709
    ]
    tabcol = 'iitax'
    if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 1202, 13625, 27355, 33694, 50236, 116751, 104035, 181572, 60936,
        5196, 2924, 597527
    ]
    tabcol = 'num_returns_ItemDed'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    if test_failure:
        assert 1 == 2
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {'II_rt1': {2013: 0.15}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'standard_income_bins', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'pc_aftertaxinc'
    expected = [np.nan,
                np.nan,
                -0.2,
                -0.8,
                -0.8,
                -0.5,
                -0.8,
                -0.7,
                -0.7,
                -0.7,
                -0.3,
                -0.1,
                -0.1,
                -0.6]
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.1, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff xbin', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'weighted_deciles', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'tot_change'
    expected = [0.0,
                0.0,
                0.3,
                2.5,
                2.8,
                2.5,
                4.4,
                5.2,
                6.3,
                8.1,
                10.6,
                10.3,
                53.0,
                6.2,
                3.5,
                0.6]
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'share_of_change'
    expected = [0.0,
                0.0,
                0.5,
                4.8,
                5.3,
                4.8,
                8.3,
                9.9,
                11.8,
                15.2,
                19.9,
                19.4,
                100.0,
                11.7,
                6.6,
                1.1]
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'pc_aftertaxinc'
    expected = [np.nan,
                np.nan,
                -0.3,
                -1.0,
                -0.7,
                -0.5,
                -0.7,
                -0.7,
                -0.7,
                -0.7,
                -0.7,
                -0.3,
                -0.6,
                -0.6,
                -0.3,
                -0.1]
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.1, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    # test creating various distribution tables

    dist, _ = calc2.distribution_tables(None, 'weighted_deciles')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [0.0,
                0.0,
                -1.8,
                2.0,
                5.0,
                6.7,
                18.0,
                26.1,
                35.2,
                82.7,
                148.8,
                734.1,
                1056.9,
                152.4,
                234.7,
                347.1]
    if not np.allclose(dist[tabcol].values, expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'count_ItemDed'
    expected = [0.0,
                0.0,
                0.4,
                1.5,
                2.5,
                2.6,
                4.5,
                5.3,
                6.3,
                7.7,
                11.0,
                13.1,
                54.9,
                6.2,
                5.5,
                1.4]
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'expanded_income'
    expected = [0.0,
                0.0,
                105.9,
                294.0,
                417.1,
                528.4,
                658.9,
                818.2,
                1037.8,
                1324.7,
                1788.1,
                4047.6,
                11020.7,
                1286.6,
                1511.9,
                1249.2]
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'aftertax_income'
    expected = [0.0,
                0.0,
                97.7,
                261.9,
                378.1,
                485.6,
                586.2,
                723.2,
                919.6,
                1130.7,
                1474.0,
                2994.5,
                9051.5,
                1002.5,
                1147.6,
                844.4]
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    dist, _ = calc2.distribution_tables(None, 'standard_income_bins')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [0.0,
                0.0,
                -0.5,
                -0.1,
                3.4,
                9.3,
                18.5,
                54.6,
                89.3,
                298.0,
                290.2,
                99.5,
                194.9,
                1056.9]
    if not np.allclose(dist[tabcol], expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'count_ItemDed'
    expected = [0.0,
                0.0,
                0.1,
                1.1,
                2.4,
                3.5,
                4.8,
                10.1,
                8.8,
                17.0,
                6.2,
                0.5,
                0.3,
                54.9]
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    if test_failure:
        assert 1 == 2
Esempio n. 20
0
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {'II_rt1': {2013: 0.15}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'standard_income_bins', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'pc_aftertaxinc'
    expected = [0.0,
                np.nan,
                -0.1,
                -0.5,
                -0.7,
                -0.7,
                -0.8,
                -0.7,
                -0.7,
                -0.7,
                -0.3,
                -0.1,
                -0.0,
                -0.6]
    if not np.allclose(diff[tabcol].values.astype('float'), expected,
                       atol=0.1, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff xbin', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'weighted_deciles', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'tot_change'
    expected = [0.0,
                0.0,
                0.0,
                0.6,
                2.9,
                3.5,
                4.4,
                6.1,
                6.5,
                8.7,
                12.0,
                13.3,
                58.0,
                7.7,
                4.8,
                0.8]
    if not np.allclose(diff[tabcol].values.astype('float'), expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'share_of_change'
    expected = [0.0,
                0.0,
                0.0,
                1.0,
                5.0,
                6.0,
                7.6,
                10.6,
                11.1,
                15.1,
                20.7,
                22.9,
                100.0,
                13.2,
                8.3,
                1.4,]
    if not np.allclose(diff[tabcol].values.astype('float'), expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'pc_aftertaxinc'
    expected = [np.nan,
                0.0,
                -0.0,
                -0.3,
                -0.8,
                -0.7,
                -0.7,
                -0.8,
                -0.7,
                -0.7,
                -0.7,
                -0.3,
                -0.6,
                -0.7,
                -0.4,
                -0.1]
    if not np.allclose(diff[tabcol].values.astype('float'), expected,
                       atol=0.1, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    # test creating various distribution tables

    dist, _ = calc2.distribution_tables(None, 'weighted_deciles')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [0.0,
                0.0,
                -0.4,
                -4.1,
                -5.9,
                8.0,
                16.9,
                29.0,
                27.0,
                71.4,
                153.4,
                910.1,
                1205.5,
                159.4,
                268.1,
                482.7]
    if not np.allclose(dist[tabcol].values.astype('float'), expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'count_ItemDed'
    expected = [0.0,
                0.0,
                0.0,
                1.1,
                2.6,
                3.9,
                4.7,
                6.3,
                6.5,
                7.4,
                11.3,
                16.3,
                60.3,
                7.4,
                7.2,
                1.7]
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'expanded_income'
    expected = [0.0,
                -1.4,
                30.7,
                209.8,
                388.8,
                541.2,
                679.1,
                847.6,
                1097.1,
                1430.7,
                1978.3,
                5007.6,
                12209.4,
                1410.9,
                1765.5,
                1831.2]
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'aftertax_income'
    expected = [0.0,
                -1.4,
                29.0,
                195.5,
                363.0,
                491.0,
                612.2,
                747.1,
                980.6,
                1248.0,
                1630.2,
                3741.3,
                10036.6,
                1100.9,
                1339.0,
                1301.4]
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    dist, _ = calc2.distribution_tables(None, 'standard_income_bins')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [0.0,
                0.0,
                -1.3,
                -7.6,
                -1.2,
                20.7,
                26.3,
                47.2,
                95.5,
                321.9,
                324.0,
                64.8,
                315.2,
                1205.5]
    if not np.allclose(dist[tabcol].values.astype('float'), expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'count_ItemDed'
    expected = [0.0,
                0.0,
                0.2,
                1.8,
                3.6,
                5.9,
                5.7,
                10.2,
                8.1,
                17.7,
                6.7,
                0.3,
                0.1,
                60.3]
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    if test_failure:
        assert 1 == 2
Esempio n. 21
0
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {'II_rt1': {2013: 0.15}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'standard_income_bins', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'pc_aftertaxinc'
    expected = [
        np.nan, np.nan, -0.2, -0.8, -0.8, -0.5, -0.8, -0.7, -0.7, -0.7, -0.3,
        -0.1, -0.1, -0.6
    ]
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.1, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff xbin', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'weighted_deciles', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'tot_change'
    expected = [
        0.0, 0.0, 0.3, 2.5, 2.8, 2.5, 4.4, 5.2, 6.3, 8.1, 10.6, 10.3, 53.0,
        6.2, 3.5, 0.6
    ]
    if not np.allclose(diff[tabcol].values, expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'share_of_change'
    expected = [
        0.0, 0.0, 0.5, 4.8, 5.3, 4.8, 8.3, 9.9, 11.8, 15.2, 19.9, 19.4, 100.0,
        11.7, 6.6, 1.1
    ]
    if not np.allclose(diff[tabcol].values, expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'pc_aftertaxinc'
    expected = [
        np.nan, np.nan, -0.3, -1.0, -0.7, -0.5, -0.7, -0.7, -0.7, -0.7, -0.7,
        -0.3, -0.6, -0.6, -0.3, -0.1
    ]
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.1, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    # test creating various distribution tables

    dist, _ = calc2.distribution_tables(None, 'weighted_deciles')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [
        0.0, 0.0, -1.8, 2.0, 5.0, 6.7, 18.0, 26.1, 35.2, 82.7, 148.8, 734.1,
        1056.9, 152.4, 234.7, 347.1
    ]
    if not np.allclose(dist[tabcol].values, expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'count_ItemDed'
    expected = [
        0.0, 0.0, 0.4, 1.5, 2.5, 2.6, 4.5, 5.3, 6.3, 7.7, 11.0, 13.1, 54.9,
        6.2, 5.5, 1.4
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'expanded_income'
    expected = [
        0.0, 0.0, 105.9, 294.0, 417.1, 528.4, 658.9, 818.2, 1037.8, 1324.7,
        1788.1, 4047.6, 11020.7, 1286.6, 1511.9, 1249.2
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'aftertax_income'
    expected = [
        0.0, 0.0, 97.7, 261.9, 378.1, 485.6, 586.2, 723.2, 919.6, 1130.7,
        1474.0, 2994.5, 9051.5, 1002.5, 1147.6, 844.4
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    dist, _ = calc2.distribution_tables(None, 'standard_income_bins')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [
        0.0, 0.0, -0.5, -0.1, 3.4, 9.3, 18.5, 54.6, 89.3, 298.0, 290.2, 99.5,
        194.9, 1056.9
    ]
    if not np.allclose(dist[tabcol], expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'count_ItemDed'
    expected = [
        0.0, 0.0, 0.1, 1.1, 2.4, 3.5, 4.8, 10.1, 8.8, 17.0, 6.2, 0.5, 0.3, 54.9
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    if test_failure:
        assert 1 == 2
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {'II_rt1': {2013: 0.15}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'standard_income_bins', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'pc_aftertaxinc'
    expected = [
        0.0, np.nan, -0.1, -0.5, -0.7, -0.7, -0.8, -0.7, -0.7, -0.7, -0.3,
        -0.1, -0.0, -0.6
    ]
    if not np.allclose(diff[tabcol].values.astype('float'),
                       expected,
                       atol=0.1,
                       rtol=0.0,
                       equal_nan=True):
        test_failure = True
        print('diff xbin', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   'weighted_deciles', 'combined')
    assert isinstance(diff, pd.DataFrame)
    tabcol = 'tot_change'
    expected = [
        0.0, 0.0, 0.0, 0.6, 2.7, 3.4, 4.2, 5.8, 6.3, 8.1, 11.5, 12.7, 55.2,
        7.2, 4.7, 0.8
    ]
    if not np.allclose(
            diff[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'share_of_change'
    expected = [
        0.0, 0.0, 0.0, 1.0, 4.9, 6.1, 7.6, 10.5, 11.4, 14.7, 20.7, 23.0, 100.0,
        13.0, 8.5, 1.5
    ]
    if not np.allclose(
            diff[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'pc_aftertaxinc'
    expected = [
        np.nan, 0.0, -0.0, -0.3, -0.8, -0.7, -0.7, -0.8, -0.7, -0.7, -0.7,
        -0.3, -0.6, -0.7, -0.4, -0.1
    ]
    if not np.allclose(diff[tabcol].values.astype('float'),
                       expected,
                       atol=0.1,
                       rtol=0.0,
                       equal_nan=True):
        test_failure = True
        print('diff xdec', tabcol)
        for val in diff[tabcol].values:
            print('{:.1f},'.format(val))

    # test creating various distribution tables

    dist, _ = calc2.distribution_tables(None, 'weighted_deciles')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [
        0.0, 0.0, -0.2, -3.5, -5.8, 9.0, 16.6, 28.6, 30.3, 70.2, 153.0, 893.7,
        1191.8, 145.6, 269.7, 478.4
    ]
    if not np.allclose(
            dist[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'count_ItemDed'
    expected = [
        0.0, 0.0, 0.0, 1.0, 2.9, 4.2, 4.5, 6.0, 6.0, 7.5, 11.3, 15.6, 59.0,
        6.7, 7.2, 1.8
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'expanded_income'
    expected = [
        0.0, -1.4, 28.1, 201.5, 377.6, 536.2, 662.2, 841.1, 1053.0, 1400.7,
        1923.1, 4956.7, 11978.6, 1374.6, 1743.5, 1838.6
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'aftertax_income'
    expected = [
        0.0, -1.4, 26.3, 187.5, 353.1, 483.4, 596.6, 743.0, 934.6, 1221.8,
        1579.5, 3721.8, 9846.3, 1096.0, 1317.8, 1308.0
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xdec', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    dist, _ = calc2.distribution_tables(None, 'standard_income_bins')
    assert isinstance(dist, pd.DataFrame)
    tabcol = 'iitax'
    expected = [
        0.0, 0.0, -1.2, -7.0, 0.2, 23.4, 26.9, 52.7, 95.5, 305.5, 321.1, 58.6,
        316.2, 1191.8
    ]
    if not np.allclose(
            dist[tabcol].values.astype('float'), expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    tabcol = 'count_ItemDed'
    expected = [
        0.0, 0.0, 0.2, 2.1, 3.5, 6.4, 5.8, 9.9, 7.8, 16.4, 6.5, 0.4, 0.1, 59.0
    ]
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.1, rtol=0.0):
        test_failure = True
        print('dist xbin', tabcol)
        for val in dist[tabcol].values:
            print('{:.1f},'.format(val))

    if test_failure:
        assert 1 == 2
Esempio n. 23
0
def summary(df1, df2, mask):
    """
    df1 contains raw results for baseline plan
    df2 contains raw results for reform plan
    mask is the boolean array specifying records with reform-induced tax diffs
    returns dictionary of summary results DataFrames
    """
    # pylint: disable=too-many-statements,too-many-locals

    df2 = create_results_columns(df1, df2, mask)

    summ = dict()

    # tax difference totals between reform and baseline
    tdiff = df2['iitax_agg'] - df1['iitax']
    aggr_itax_d = (tdiff * df2['s006']).sum()
    tdiff = df2['payrolltax_agg'] - df1['payrolltax']
    aggr_ptax_d = (tdiff * df2['s006']).sum()
    tdiff = df2['combined_agg'] - df1['combined']
    aggr_comb_d = (tdiff * df2['s006']).sum()
    aggrd = [aggr_itax_d, aggr_ptax_d, aggr_comb_d]
    summ['aggr_d'] = pd.DataFrame(data=aggrd, index=AGGR_ROW_NAMES)

    # totals for baseline
    aggr_itax_1 = (df1['iitax'] * df1['s006']).sum()
    aggr_ptax_1 = (df1['payrolltax'] * df1['s006']).sum()
    aggr_comb_1 = (df1['combined'] * df1['s006']).sum()
    aggr1 = [aggr_itax_1, aggr_ptax_1, aggr_comb_1]
    summ['aggr_1'] = pd.DataFrame(data=aggr1, index=AGGR_ROW_NAMES)

    # totals for reform
    aggr_itax_2 = (df2['iitax_agg'] * df2['s006']).sum()
    aggr_ptax_2 = (df2['payrolltax_agg'] * df2['s006']).sum()
    aggr_comb_2 = (df2['combined_agg'] * df2['s006']).sum()
    aggr2 = [aggr_itax_2, aggr_ptax_2, aggr_comb_2]
    summ['aggr_2'] = pd.DataFrame(data=aggr2, index=AGGR_ROW_NAMES)

    # create difference tables grouped by xdec
    df2['iitax'] = df2['iitax_xdec']
    summ['diff_itax_xdec'] = \
        create_difference_table(df1, df2,
                                groupby='weighted_deciles',
                                income_measure='expanded_income',
                                tax_to_diff='iitax')

    df2['payrolltax'] = df2['payrolltax_xdec']
    summ['diff_ptax_xdec'] = \
        create_difference_table(df1, df2,
                                groupby='weighted_deciles',
                                income_measure='expanded_income',
                                tax_to_diff='payrolltax')

    df2['combined'] = df2['combined_xdec']
    summ['diff_comb_xdec'] = \
        create_difference_table(df1, df2,
                                groupby='weighted_deciles',
                                income_measure='expanded_income',
                                tax_to_diff='combined')

    # create difference tables grouped by xbin (removing negative-income bin)
    df2['iitax'] = df2['iitax_xbin']
    diff_itax_xbin = \
        create_difference_table(df1, df2,
                                groupby='standard_income_bins',
                                income_measure='expanded_income',
                                tax_to_diff='iitax')
    diff_itax_xbin.drop(diff_itax_xbin.index[0], inplace=True)
    summ['diff_itax_xbin'] = diff_itax_xbin

    df2['payrolltax'] = df2['payrolltax_xbin']
    diff_ptax_xbin = \
        create_difference_table(df1, df2,
                                groupby='standard_income_bins',
                                income_measure='expanded_income',
                                tax_to_diff='payrolltax')
    diff_ptax_xbin.drop(diff_ptax_xbin.index[0], inplace=True)
    summ['diff_ptax_xbin'] = diff_ptax_xbin

    df2['combined'] = df2['combined_xbin']
    diff_comb_xbin = \
        create_difference_table(df1, df2,
                                groupby='standard_income_bins',
                                income_measure='expanded_income',
                                tax_to_diff='combined')
    diff_comb_xbin.drop(diff_comb_xbin.index[0], inplace=True)
    summ['diff_comb_xbin'] = diff_comb_xbin

    # create distribution tables grouped by xdec
    summ['dist1_xdec'] = \
        create_distribution_table(df1, groupby='weighted_deciles',
                                  income_measure='expanded_income',
                                  result_type='weighted_sum')

    suffix = '_xdec'
    df2_cols_with_suffix = [c for c in list(df2) if c.endswith(suffix)]
    for col in df2_cols_with_suffix:
        root_col_name = col.replace(suffix, '')
        df2[root_col_name] = df2[col]
    df2['expanded_income_baseline'] = df1['expanded_income']
    summ['dist2_xdec'] = \
        create_distribution_table(df2, groupby='weighted_deciles',
                                  income_measure='expanded_income_baseline',
                                  result_type='weighted_sum')

    # create distribution tables grouped by xbin (removing negative-income bin)
    dist1_xbin = \
        create_distribution_table(df1, groupby='standard_income_bins',
                                  income_measure='expanded_income',
                                  result_type='weighted_sum')
    dist1_xbin.drop(dist1_xbin.index[0], inplace=True)
    summ['dist1_xbin'] = dist1_xbin

    suffix = '_xbin'
    df2_cols_with_suffix = [c for c in list(df2) if c.endswith(suffix)]
    for col in df2_cols_with_suffix:
        root_col_name = col.replace(suffix, '')
        df2[root_col_name] = df2[col]
    df2['expanded_income_baseline'] = df1['expanded_income']
    dist2_xbin = \
        create_distribution_table(df2, groupby='standard_income_bins',
                                  income_measure='expanded_income_baseline',
                                  result_type='weighted_sum')
    dist2_xbin.drop(dist2_xbin.index[0], inplace=True)
    summ['dist2_xbin'] = dist2_xbin

    # return dictionary of summary results
    return summ