예제 #1
0
def test_dist_table_sum_row(cps_subsample):
    rec = Records.cps_constructor(data=cps_subsample)
    calc = Calculator(policy=Policy(), records=rec)
    calc.calc_all()
    tb1 = create_distribution_table(calc.distribution_table_dataframe(),
                                    'standard_income_bins', 'expanded_income')
    tb2 = create_distribution_table(calc.distribution_table_dataframe(),
                                    'soi_agi_bins', 'expanded_income')
    assert np.allclose(tb1[-1:], tb2[-1:])
예제 #2
0
def test_dist_table_sum_row(cps_subsample):
    rec = Records.cps_constructor(data=cps_subsample)
    calc = Calculator(policy=Policy(), records=rec)
    calc.calc_all()
    tb1 = create_distribution_table(calc.distribution_table_dataframe(),
                                    'standard_income_bins', 'expanded_income')
    tb2 = create_distribution_table(calc.distribution_table_dataframe(),
                                    'soi_agi_bins', 'expanded_income')
    assert np.allclose(tb1[-1:], tb2[-1:])
예제 #3
0
def test_dist_table_sum_row(pit_subsample, cit_crosssample):
    rec = Records(data=pit_subsample)
    crec = CorpRecords(data=cit_crosssample)
    calc = Calculator(policy=Policy(), records=rec, corprecords=crec)
    calc.calc_all()
    tb1 = create_distribution_table(calc.distribution_table_dataframe(),
                                    'standard_income_bins', 'GTI')
    tb2 = create_distribution_table(calc.distribution_table_dataframe(),
                                    'weighted_deciles', 'GTI')
    allrow1 = tb1[-1:]
    allrow2 = tb2[-4:-3]
    assert np.allclose(allrow1, allrow2)
예제 #4
0
 def distribution_table(self, year, groupby, income_measure, calc):
     """
     Method to create a distribution table
     Parameters
     ----------
     year: which year the distribution table data should be from
     groupby: determines how the rows in the table are sorted
         options: 'weighted_deciles', 'standard_income_bins', 'soi_agi_bin'
     income_measure: determines which variable is used to sort the rows in
                     the table
         options: 'expanded_income' or 'expanded_income_baseline'
     calc: which calculator to use: base or reform
     Returns
     -------
     DataFrame containing a distribution table
     """
     # pull desired data
     if calc.lower() == "base":
         data = self.base_data[year]
     elif calc.lower() == "reform":
         data = self.reform_data[year]
     else:
         raise ValueError("calc must be either BASE or REFORM")
     # minor data preparation before calling the function
     data["num_returns_ItemDed"] = data["s006"].where(
         data["c04470"] > 0., 0.)
     data["num_returns_StandardDed"] = data["s006"].where(
         data["standard"] > 0., 0.)
     data["num_returns_AMT"] = data["s006"].where(data["c09600"] > 0., 0.)
     if income_measure == "expanded_income_baseline":
         base_income = self.base_data[year]["expanded_income"]
         data["expanded_income_baseline"] = base_income
     table = create_distribution_table(data, groupby, income_measure)
     return table
예제 #5
0
def test_dist_table_sum_row(records_2009):
    # Create a default Policy object
    policy1 = Policy()
    # Create a Calculator
    calc1 = Calculator(policy=policy1, records=records_2009)
    calc1.calc_all()
    tb1 = create_distribution_table(calc1.records,
                                    groupby='small_income_bins',
                                    result_type='weighted_sum')
    tb2 = create_distribution_table(calc1.records,
                                    groupby='large_income_bins',
                                    result_type='weighted_sum')
    assert np.allclose(tb1[-1:], tb2[-1:])
    tb3 = create_distribution_table(calc1.records,
                                    groupby='small_income_bins',
                                    result_type='weighted_avg')
    assert isinstance(tb3, pd.DataFrame)
예제 #6
0
def test_dist_table_sum_row(cps_subsample):
    rec = Records.cps_constructor(data=cps_subsample)
    calc = Calculator(policy=Policy(), records=rec)
    calc.calc_all()
    tb1 = create_distribution_table(calc.records,
                                    groupby='small_income_bins',
                                    income_measure='expanded_income',
                                    result_type='weighted_sum')
    tb2 = create_distribution_table(calc.records,
                                    groupby='large_income_bins',
                                    income_measure='expanded_income',
                                    result_type='weighted_sum')
    assert np.allclose(tb1[-1:], tb2[-1:])
    tb3 = create_distribution_table(calc.records,
                                    groupby='small_income_bins',
                                    income_measure='expanded_income',
                                    result_type='weighted_avg')
    assert isinstance(tb3, pd.DataFrame)
예제 #7
0
def summary_dist_xdec(res, df1, df2):
    """
    res is dictionary of summary-results DataFrames.
    df1 contains results variables for baseline policy.
    df2 contains results variables for reform policy.
    returns augmented dictionary of summary-results DataFrames.
    """
    # create distribution tables grouped by xdec
    res['dist1_xdec'] = \
        create_distribution_table(df1, 'weighted_deciles',
                                  'expanded_income')
    df2['expanded_income_baseline'] = df1['expanded_income']
    res['dist2_xdec'] = \
        create_distribution_table(df2, 'weighted_deciles',
                                  'expanded_income_baseline')
    del df2['expanded_income_baseline']
    # return res dictionary
    return res
예제 #8
0
    def distribution_table(self,
                           year: int,
                           groupby: str,
                           income_measure: str,
                           calc: str,
                           pop_quantiles: bool = False) -> pd.DataFrame:
        """
        Method to create a distribution table

        Parameters
        ----------
        year: int
            which year the distribution table data should be from
        groupby: str
            determines how the rows in the table are sorted
            options: 'weighted_deciles', 'standard_income_bins',
            'soi_agi_bin'
        income_measure: str
            determines which variable is used to sort the rows in
            the table
            options: 'expanded_income' or 'expanded_income_baseline'
        calc: str
            which calculator to use, can take either
            `'REFORM'` or `'BASE'`
        calc: which calculator to use: base or reform
        pop_quantiles: bool
            whether or not weighted_deciles contain equal number of
            tax units (False) or people (True)

        Returns
        -------
        table: Pandas DataFrame
            distribution table
        """
        # pull desired data
        if calc.lower() == "base":
            data = self.base_data[year]
        elif calc.lower() == "reform":
            data = self.reform_data[year]
        else:
            raise ValueError("calc must be either BASE or REFORM")
        # minor data preparation before calling the function
        if pop_quantiles:
            data["count"] = data["s006"] * data["XTOT"]
        else:
            data["count"] = data["s006"]
        data["count_ItemDed"] = data["count"].where(data["c04470"] > 0., 0.)
        data["count_StandardDed"] = data["count"].where(
            data["standard"] > 0., 0.)
        data["count_AMT"] = data["count"].where(data["c09600"] > 0., 0.)
        if income_measure == "expanded_income_baseline":
            base_income = self.base_data[year]["expanded_income"]
            data["expanded_income_baseline"] = base_income
        table = create_distribution_table(data, groupby, income_measure,
                                          pop_quantiles)
        return table
예제 #9
0
def test_row_classifier(puf_1991, weights_1991):
    # create a current-law Policy object and Calculator calc1
    policy1 = Policy()
    records1 = Records(data=puf_1991, weights=weights_1991, start_year=2009)
    calc1 = Calculator(policy=policy1, records=records1)
    calc1.calc_all()
    calc1_s006 = create_distribution_table(calc1.records,
                                           groupby='webapp_income_bins',
                                           result_type='weighted_sum').s006
    # create a policy-reform Policy object and Calculator calc2
    reform = {2013: {'_ALD_StudentLoan_hc': [1]}}
    policy2 = Policy()
    policy2.implement_reform(reform)
    records2 = Records(data=puf_1991, weights=weights_1991, start_year=2009)
    calc2 = Calculator(policy=policy2, records=records2)
    calc2.calc_all()
    calc2_s006 = create_distribution_table(calc2.records,
                                           groupby='webapp_income_bins',
                                           result_type='weighted_sum',
                                           baseline_obj=calc1.records).s006
    # use weighted sum of weights in each cell to check classifer
    assert_array_equal(calc1_s006, calc2_s006)
예제 #10
0
def test_create_tables(puf_1991, weights_1991):
    # create a current-law Policy object and Calculator object calc1
    policy1 = Policy()
    records1 = Records(data=puf_1991, weights=weights_1991, start_year=2009)
    calc1 = Calculator(policy=policy1, records=records1)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt4': [0.56]}}
    policy2 = Policy()
    policy2.implement_reform(reform)
    records2 = Records(data=puf_1991, weights=weights_1991, start_year=2009)
    calc2 = Calculator(policy=policy2, records=records2)
    calc2.calc_all()
    # test creating various distribution tables
    dt1 = create_difference_table(calc1.records,
                                  calc2.records,
                                  groupby='large_income_bins')
    assert isinstance(dt1, pd.DataFrame)
    dt2 = create_difference_table(calc1.records,
                                  calc2.records,
                                  groupby='webapp_income_bins')
    assert isinstance(dt2, pd.DataFrame)
    with pytest.raises(ValueError):
        create_difference_table(calc1.records,
                                calc2.records,
                                groupby='bad_bins')
    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='small_income_bins',
                                  result_type='bad_result_type')
    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='bad_bins',
                                  result_type='weighted_sum')
    dt3 = create_distribution_table(calc2.records,
                                    groupby='small_income_bins',
                                    result_type='weighted_sum',
                                    baseline_obj=calc1.records,
                                    diffs=True)
    assert isinstance(dt3, pd.DataFrame)
    calc1.increment_year()
    with pytest.raises(ValueError):
        create_difference_table(calc1.records,
                                calc2.records,
                                groupby='large_income_bins')
    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='small_income_bins',
                                  result_type='weighted_sum',
                                  baseline_obj=calc1.records,
                                  diffs=True)
예제 #11
0
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt1': [0.15]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    # test creating various difference tables

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='large_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0.00, 0.01, 0.41, 0.84, 0.92, 1.10, 1.15, 1.04, 0.78, 0.27, np.nan
    ]
    assert np.allclose(diff['perc_aftertax'],
                       expected,
                       atol=0.005,
                       rtol=0.0,
                       equal_nan=True)

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='webapp_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0.00, 0.01, 0.41, 0.84, 0.92, 1.10, 1.15, 1.04, 0.78, 0.30, 0.08, 0.07,
        np.nan
    ]
    assert np.allclose(diff['perc_aftertax'],
                       expected,
                       atol=0.005,
                       rtol=0.0,
                       equal_nan=True)

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='small_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0.00, 0.01, 0.02, 0.16, 0.64, 0.82, 0.87, 0.92, 1.10, 1.15, 1.04, 0.78,
        0.30, 0.08, 0.09, 0.07, 0.05, 0.02, 0.00, np.nan
    ]
    assert np.allclose(diff['perc_aftertax'],
                       expected,
                       atol=0.005,
                       rtol=0.0,
                       equal_nan=True)

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='weighted_deciles',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        14931, 276555, 7728872, 22552703, 34008512, 50233787, 76811377,
        111167087, 123226970, 111414038, 537434832, 66560891, 39571078, 5282069
    ]
    assert np.allclose(diff['tot_change'], expected, atol=0.5, rtol=0.0)
    expected = [
        0.00, 0.05, 1.44, 4.20, 6.33, 9.35, 14.29, 20.68, 22.93, 20.73, 100.00,
        12.38, 7.36, 0.98
    ]
    assert np.allclose(diff['share_of_change'], expected, atol=0.005, rtol=0.0)
    expected = [
        0.00, 0.02, 0.35, 0.79, 0.89, 0.97, 1.11, 1.18, 0.91, 0.50, np.nan,
        0.70, 0.37, 0.06
    ]
    assert np.allclose(diff['perc_aftertax'],
                       expected,
                       atol=0.005,
                       rtol=0.0,
                       equal_nan=True)
    expected = [
        -0.00, -0.02, -0.35, -0.79, -0.89, -0.97, -1.11, -1.18, -0.91, -0.50,
        np.nan, -0.70, -0.37, -0.06
    ]
    assert np.allclose(diff['pc_aftertaxinc'],
                       expected,
                       atol=0.005,
                       rtol=0.0,
                       equal_nan=True)

    with pytest.raises(ValueError):
        create_difference_table(calc1.records,
                                calc2.records,
                                groupby='bad_bins',
                                income_measure='expanded_income',
                                tax_to_diff='iitax')

    # test creating various distribution tables

    dist = create_distribution_table(calc2.records,
                                     groupby='weighted_deciles',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)

    expected = [
        -8851215, -99666120, -123316561, -85895787, -47357458, 207462144,
        443391189, 978487989, 1709504845, 7631268907, 10605027933, 4171055704,
        2751003155, 709210048
    ]
    assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0)
    expected = [
        1202, 1688, 13506, 18019, 30130, 48244, 80994, 112788, 131260, 146001,
        583832, 75279, 56819, 13903
    ]
    assert np.allclose(dist['num_returns_ItemDed'].tolist(),
                       expected,
                       atol=0.5,
                       rtol=0.0)
    expected = [
        158456013, 1351981790, 2383726863, 3408544081, 4569232020, 6321944661,
        8520304098, 11817197884, 17299173380, 41117720202, 96948280992,
        21687950798, 15093608351, 4336161053
    ]
    assert np.allclose(dist['expanded_income'].tolist(),
                       expected,
                       atol=0.5,
                       rtol=0.0)
    expected = [
        147367698, 1354827269, 2351611947, 3192405234, 4157431713, 5454468907,
        7125788590, 9335613303, 13417244946, 29691084873, 76227844481,
        15608893056, 10854804442, 3227387375
    ]
    assert np.allclose(dist['aftertax_income'].tolist(),
                       expected,
                       atol=0.5,
                       rtol=0.0)

    dist = create_distribution_table(calc2.records,
                                     groupby='webapp_income_bins',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        -103274, -83144506, -152523834, -129881470, 85802556, 255480678,
        832529135, 1066963515, 3023956558, 2876331264, 1008672459, 1820944852,
        10605027933
    ]
    assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0)
    expected = [
        0, 1202, 22654, 31665, 30547, 49851, 124786, 97349, 160147, 56806,
        5803, 3023, 583832
    ]
    assert np.allclose(dist['num_returns_ItemDed'].tolist(),
                       expected,
                       atol=0.5,
                       rtol=0.0)

    setattr(calc2.records, 'expanded_income_baseline',
            getattr(calc2.records, 'expanded_income'))
    dist = create_distribution_table(calc2.records,
                                     groupby='webapp_income_bins',
                                     income_measure='expanded_income_baseline',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)

    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='small_income_bins',
                                  income_measure='expanded_income',
                                  result_type='bad_result_type')

    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='bad_bins',
                                  income_measure='expanded_income',
                                  result_type='weighted_sum')
예제 #12
0
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt1': [0.15]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='large_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        np.nan, np.nan, -0.16, -0.57, -0.72, -0.69, -0.82, -0.80, -0.75, -0.65,
        -0.18, -0.59
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='standard_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        np.nan, np.nan, -0.16, -0.57, -0.72, -0.69, -0.82, -0.80, -0.75, -0.65,
        -0.23, -0.09, -0.06, -0.59
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='small_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        np.nan, np.nan, -0.30, -0.10, -0.24, -0.76, -0.67, -0.75, -0.69, -0.82,
        -0.80, -0.75, -0.65, -0.23, -0.09, -0.08, -0.07, -0.05, -0.02, np.nan,
        -0.59
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='weighted_deciles',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0, 0, 1219678, 15503037, 25922077, 35000592, 48336897, 62637728,
        79750078, 93136108, 116996252, 102458801, 580961247, 63156380,
        33664610, 5637811
    ]
    tabcol = 'tot_change'
    if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0.00, 0.00, 0.21, 2.67, 4.46, 6.02, 8.32, 10.78, 13.73, 16.03, 20.14,
        17.64, 100.00, 10.87, 5.79, 0.97
    ]
    tabcol = 'share_of_change'
    if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))
    expected = [
        np.nan, np.nan, -0.15, -0.62, -0.70, -0.73, -0.78, -0.80, -0.80, -0.74,
        -0.71, -0.30, -0.59, -0.55, -0.25, -0.06
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))
    expected = [
        np.nan, np.nan, -0.15, -0.62, -0.70, -0.73, -0.78, -0.80, -0.80, -0.74,
        -0.71, -0.30, -0.59, -0.55, -0.25, -0.06
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    # test creating various distribution tables

    dvdf = calc2.distribution_table_dataframe()
    dvdf = add_quantile_table_row_variable(dvdf,
                                           'expanded_income',
                                           num_quantiles=10,
                                           decile_details=True)
    dist = create_distribution_table(dvdf,
                                     groupby='weighted_deciles',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        0, 0, -53644343, -65258622, -57617119, 37391333, 200879230, 329784586,
        553827330, 1015854407, 1731283600, 7090603505, 10783103907, 1638192777,
        2213960052, 3238450675
    ]
    tabcol = 'iitax'
    if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 0, 2561, 12610, 21936, 29172, 50890, 61563, 78247, 91823, 118523,
        128886, 596211, 63986, 51634, 13266
    ]
    tabcol = 'num_returns_ItemDed'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 0, 835224673, 2639667638, 3940559051, 5286856071, 6972849344,
        8881099529, 11467767759, 14761195525, 19832126806, 44213000235,
        118830346631, 14399218059, 16868648076, 12945134101
    ]
    tabcol = 'expanded_income'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 0, 818813684, 2466000535, 3671150517, 4790979126, 6173998985,
        7754183496, 9907604744, 12510477225, 16273592612, 33915377411,
        98282178334, 11345456373, 13400757263, 9169163776
    ]
    tabcol = 'aftertax_income'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    dist = create_distribution_table(calc2.distribution_table_dataframe(),
                                     groupby='standard_income_bins',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        0, 0, -42244205, -76727831, -62581860, 53797887, 217016689, 723516183,
        1108097059, 3272479928, 2818979541, 950296405, 1820474110, 10783103907
    ]
    tabcol = 'iitax'
    if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 0, 1202, 13614, 27272, 34407, 48265, 117225, 103319, 181885, 61014,
        5126, 2882, 596211
    ]
    tabcol = 'num_returns_ItemDed'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    if test_failure:
        assert 1 == 2
예제 #13
0
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt1': [0.15]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='large_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0.00, -0.14, -0.58, -0.70, -0.71, -0.81, -0.83, -0.74, -0.65, -0.18,
        -0.59
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='standard_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0.00, -0.14, -0.58, -0.70, -0.71, -0.81, -0.83, -0.74, -0.65, -0.23,
        -0.09, -0.06, -0.59
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='small_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0.00, -0.29, -0.07, -0.22, -0.80, -0.65, -0.74, -0.71, -0.81, -0.83,
        -0.74, -0.65, -0.23, -0.09, -0.08, -0.07, -0.05, -0.02, 0.00, -0.59
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='weighted_deciles',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0, 855188, 15425829, 26212078, 33369237, 50208703, 63312937, 82312360,
        90711899, 117518598, 101779164, 581705993, 62142547, 33919755, 5716862
    ]
    tabcol = 'tot_change'
    if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0.00, 0.15, 2.65, 4.51, 5.74, 8.63, 10.88, 14.15, 15.59, 20.20, 17.50,
        100.00, 10.68, 5.83, 0.98
    ]
    tabcol = 'share_of_change'
    if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))
    expected = [
        0.00, -0.11, -0.62, -0.71, -0.69, -0.81, -0.82, -0.82, -0.72, -0.71,
        -0.30, -0.59, -0.55, -0.25, -0.06
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))
    expected = [
        0.00, -0.11, -0.62, -0.71, -0.69, -0.81, -0.82, -0.82, -0.72, -0.71,
        -0.30, -0.59, -0.55, -0.25, -0.06
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    # test creating various distribution tables

    dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES),
                                     groupby='weighted_deciles',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        0, -56140397, -67237556, -58897159, 17222017, 212673684, 328116256,
        573255089, 992965515, 1730626734, 7142993526, 10815577709, 1625179635,
        2241659962, 3276153930
    ]
    tabcol = 'iitax'
    if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 1202, 13981, 21932, 27445, 52318, 62509, 79749, 91861, 117068,
        129463, 597527, 63940, 52137, 13387
    ]
    tabcol = 'num_returns_ItemDed'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 812766585, 2639118220, 3940557055, 5243088362, 6988752253,
        8827238879, 11605062543, 14729565181, 19894042635, 44374875397,
        119055067109, 14255277238, 17039539254, 13080058905
    ]
    tabcol = 'expanded_income'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 801755209, 2466382489, 3674186760, 4779876836, 6150380331,
        7701226391, 10000914935, 12515316309, 16352910962, 34006973974,
        98449924197, 11219604941, 13525917494, 9261451538
    ]
    tabcol = 'aftertax_income'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES),
                                     groupby='standard_income_bins',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        0, -44670465, -79534586, -61791623, 34666275, 216487136, 742113595,
        1099657851, 3270948526, 2826393721, 962881064, 1848426216, 10815577709
    ]
    tabcol = 'iitax'
    if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 1202, 13625, 27355, 33694, 50236, 116751, 104035, 181572, 60936,
        5196, 2924, 597527
    ]
    tabcol = 'num_returns_ItemDed'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    if test_failure:
        assert 1 == 2
예제 #14
0
def dropq_summary(df1, df2, mask):
    """
    df1 contains raw results for the standard plan X and X'
    df2 contains raw results the user-specified plan (Plan Y)
    mask is the boolean mask where X and X' match
    """
    # pylint: disable=too-many-locals

    df1, df2 = drop_records(df1, df2, mask)

    # Totals for diff between baseline and reform
    dec_sum = (df2['tax_diff_dec'] * df2['s006']).sum()
    bin_sum = (df2['tax_diff_bin'] * df2['s006']).sum()
    pr_dec_sum = (df2['payrolltax_diff_dec'] * df2['s006']).sum()
    pr_bin_sum = (df2['payrolltax_diff_bin'] * df2['s006']).sum()
    combined_dec_sum = (df2['combined_diff_dec'] * df2['s006']).sum()
    combined_bin_sum = (df2['combined_diff_bin'] * df2['s006']).sum()

    # Totals for baseline
    sum_baseline = (df1['iitax'] * df1['s006']).sum()
    pr_sum_baseline = (df1['payrolltax'] * df1['s006']).sum()
    combined_sum_baseline = (df1['combined'] * df1['s006']).sum()

    # Totals for reform
    sum_reform = (df2['iitax_dec'] * df2['s006']).sum()
    pr_sum_reform = (df2['payrolltax_dec'] * df2['s006']).sum()
    combined_sum_reform = (df2['combined_dec'] * df2['s006']).sum()

    # Create difference tables, grouped by deciles and bins
    diffs_dec = dropq_diff_table(df1,
                                 df2,
                                 groupby='weighted_deciles',
                                 res_col='tax_diff',
                                 diff_col='iitax',
                                 suffix='_dec',
                                 wsum=dec_sum)

    diffs_bin = dropq_diff_table(df1,
                                 df2,
                                 groupby='webapp_income_bins',
                                 res_col='tax_diff',
                                 diff_col='iitax',
                                 suffix='_bin',
                                 wsum=bin_sum)

    pr_diffs_dec = dropq_diff_table(df1,
                                    df2,
                                    groupby='weighted_deciles',
                                    res_col='payrolltax_diff',
                                    diff_col='payrolltax',
                                    suffix='_dec',
                                    wsum=pr_dec_sum)

    pr_diffs_bin = dropq_diff_table(df1,
                                    df2,
                                    groupby='webapp_income_bins',
                                    res_col='payrolltax_diff',
                                    diff_col='payrolltax',
                                    suffix='_bin',
                                    wsum=pr_bin_sum)

    comb_diffs_dec = dropq_diff_table(df1,
                                      df2,
                                      groupby='weighted_deciles',
                                      res_col='combined_diff',
                                      diff_col='combined',
                                      suffix='_dec',
                                      wsum=combined_dec_sum)

    comb_diffs_bin = dropq_diff_table(df1,
                                      df2,
                                      groupby='webapp_income_bins',
                                      res_col='combined_diff',
                                      diff_col='combined',
                                      suffix='_bin',
                                      wsum=combined_bin_sum)

    m1_dec = create_distribution_table(df1,
                                       groupby='weighted_deciles',
                                       result_type='weighted_sum')

    m2_dec = dropq_dist_table(df2,
                              groupby='weighted_deciles',
                              result_type='weighted_sum',
                              suffix='_dec')

    m1_bin = create_distribution_table(df1,
                                       groupby='webapp_income_bins',
                                       result_type='weighted_sum')

    m2_bin = dropq_dist_table(df2,
                              groupby='webapp_income_bins',
                              result_type='weighted_sum',
                              suffix='_bin')

    return (m2_dec, m1_dec, diffs_dec, pr_diffs_dec, comb_diffs_dec, m2_bin,
            m1_bin, diffs_bin, pr_diffs_bin, comb_diffs_bin, dec_sum,
            pr_dec_sum, combined_dec_sum, sum_baseline, pr_sum_baseline,
            combined_sum_baseline, sum_reform, pr_sum_reform,
            combined_sum_reform)
예제 #15
0
def summary(df1, df2, mask):
    """
    df1 contains raw results for baseline plan
    df2 contains raw results for reform plan
    mask is the boolean array specifying records with reform-induced tax diffs
    returns dictionary of summary results DataFrames
    """
    # pylint: disable=too-many-statements,too-many-locals

    df2 = create_results_columns(df1, df2, mask)

    summ = dict()

    # tax difference totals between reform and baseline
    tdiff = df2['iitax_agg'] - df1['iitax']
    aggr_itax_d = (tdiff * df2['s006']).sum()
    tdiff = df2['payrolltax_agg'] - df1['payrolltax']
    aggr_ptax_d = (tdiff * df2['s006']).sum()
    tdiff = df2['combined_agg'] - df1['combined']
    aggr_comb_d = (tdiff * df2['s006']).sum()
    aggrd = [aggr_itax_d, aggr_ptax_d, aggr_comb_d]
    summ['aggr_d'] = pd.DataFrame(data=aggrd, index=AGGR_ROW_NAMES)

    # totals for baseline
    aggr_itax_1 = (df1['iitax'] * df1['s006']).sum()
    aggr_ptax_1 = (df1['payrolltax'] * df1['s006']).sum()
    aggr_comb_1 = (df1['combined'] * df1['s006']).sum()
    aggr1 = [aggr_itax_1, aggr_ptax_1, aggr_comb_1]
    summ['aggr_1'] = pd.DataFrame(data=aggr1, index=AGGR_ROW_NAMES)

    # totals for reform
    aggr_itax_2 = (df2['iitax_agg'] * df2['s006']).sum()
    aggr_ptax_2 = (df2['payrolltax_agg'] * df2['s006']).sum()
    aggr_comb_2 = (df2['combined_agg'] * df2['s006']).sum()
    aggr2 = [aggr_itax_2, aggr_ptax_2, aggr_comb_2]
    summ['aggr_2'] = pd.DataFrame(data=aggr2, index=AGGR_ROW_NAMES)

    # create difference tables grouped by xdec
    df2['iitax'] = df2['iitax_xdec']
    summ['diff_itax_xdec'] = \
        create_difference_table(df1, df2,
                                groupby='weighted_deciles',
                                income_measure='expanded_income',
                                tax_to_diff='iitax')

    df2['payrolltax'] = df2['payrolltax_xdec']
    summ['diff_ptax_xdec'] = \
        create_difference_table(df1, df2,
                                groupby='weighted_deciles',
                                income_measure='expanded_income',
                                tax_to_diff='payrolltax')

    df2['combined'] = df2['combined_xdec']
    summ['diff_comb_xdec'] = \
        create_difference_table(df1, df2,
                                groupby='weighted_deciles',
                                income_measure='expanded_income',
                                tax_to_diff='combined')

    # create difference tables grouped by xbin (removing negative-income bin)
    df2['iitax'] = df2['iitax_xbin']
    diff_itax_xbin = \
        create_difference_table(df1, df2,
                                groupby='standard_income_bins',
                                income_measure='expanded_income',
                                tax_to_diff='iitax')
    diff_itax_xbin.drop(diff_itax_xbin.index[0], inplace=True)
    summ['diff_itax_xbin'] = diff_itax_xbin

    df2['payrolltax'] = df2['payrolltax_xbin']
    diff_ptax_xbin = \
        create_difference_table(df1, df2,
                                groupby='standard_income_bins',
                                income_measure='expanded_income',
                                tax_to_diff='payrolltax')
    diff_ptax_xbin.drop(diff_ptax_xbin.index[0], inplace=True)
    summ['diff_ptax_xbin'] = diff_ptax_xbin

    df2['combined'] = df2['combined_xbin']
    diff_comb_xbin = \
        create_difference_table(df1, df2,
                                groupby='standard_income_bins',
                                income_measure='expanded_income',
                                tax_to_diff='combined')
    diff_comb_xbin.drop(diff_comb_xbin.index[0], inplace=True)
    summ['diff_comb_xbin'] = diff_comb_xbin

    # create distribution tables grouped by xdec
    summ['dist1_xdec'] = \
        create_distribution_table(df1, groupby='weighted_deciles',
                                  income_measure='expanded_income',
                                  result_type='weighted_sum')

    suffix = '_xdec'
    df2_cols_with_suffix = [c for c in list(df2) if c.endswith(suffix)]
    for col in df2_cols_with_suffix:
        root_col_name = col.replace(suffix, '')
        df2[root_col_name] = df2[col]
    df2['expanded_income_baseline'] = df1['expanded_income']
    summ['dist2_xdec'] = \
        create_distribution_table(df2, groupby='weighted_deciles',
                                  income_measure='expanded_income_baseline',
                                  result_type='weighted_sum')

    # create distribution tables grouped by xbin (removing negative-income bin)
    dist1_xbin = \
        create_distribution_table(df1, groupby='standard_income_bins',
                                  income_measure='expanded_income',
                                  result_type='weighted_sum')
    dist1_xbin.drop(dist1_xbin.index[0], inplace=True)
    summ['dist1_xbin'] = dist1_xbin

    suffix = '_xbin'
    df2_cols_with_suffix = [c for c in list(df2) if c.endswith(suffix)]
    for col in df2_cols_with_suffix:
        root_col_name = col.replace(suffix, '')
        df2[root_col_name] = df2[col]
    df2['expanded_income_baseline'] = df1['expanded_income']
    dist2_xbin = \
        create_distribution_table(df2, groupby='standard_income_bins',
                                  income_measure='expanded_income_baseline',
                                  result_type='weighted_sum')
    dist2_xbin.drop(dist2_xbin.index[0], inplace=True)
    summ['dist2_xbin'] = dist2_xbin

    # return dictionary of summary results
    return summ
예제 #16
0
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt1': [0.15]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='large_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [np.nan,
                np.nan,
                -0.14,
                -0.58,
                -0.71,
                -0.70,
                -0.83,
                -0.81,
                -0.73,
                -0.65,
                -0.18,
                -0.59]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.005, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='standard_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [np.nan,
                np.nan,
                -0.14,
                -0.58,
                -0.71,
                -0.70,
                -0.83,
                -0.81,
                -0.73,
                -0.65,
                -0.23,
                -0.09,
                -0.06,
                -0.59]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.005, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='small_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [np.nan,
                np.nan,
                -0.29,
                -0.07,
                -0.23,
                -0.78,
                -0.66,
                -0.74,
                -0.70,
                -0.83,
                -0.81,
                -0.73,
                -0.65,
                -0.23,
                -0.09,
                -0.08,
                -0.07,
                -0.05,
                -0.02,
                np.nan,
                -0.59]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.005, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='weighted_deciles',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [0,
                0,
                1037894,
                16199646,
                25518793,
                34455230,
                49661093,
                62344194,
                82290396,
                90006817,
                117415735,
                101818106,
                580747904,
                62408600,
                33771695,
                5637811]
    tabcol = 'tot_change'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.51, rtol=0.0):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [0.00,
                0.00,
                0.18,
                2.79,
                4.39,
                5.93,
                8.55,
                10.74,
                14.17,
                15.50,
                20.22,
                17.53,
                100.00,
                10.75,
                5.82,
                0.97]
    tabcol = 'share_of_change'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.005, rtol=0.0):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))
    expected = [np.nan,
                np.nan,
                -0.13,
                -0.65,
                -0.68,
                -0.71,
                -0.79,
                -0.80,
                -0.82,
                -0.71,
                -0.71,
                -0.30,
                -0.59,
                -0.55,
                -0.25,
                -0.06]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.005, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))
    expected = [np.nan,
                np.nan,
                -0.13,
                -0.65,
                -0.68,
                -0.71,
                -0.79,
                -0.80,
                -0.82,
                -0.71,
                -0.71,
                -0.30,
                -0.59,
                -0.55,
                -0.25,
                -0.06]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(diff[tabcol].values, expected,
                       atol=0.005, rtol=0.0, equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    # test creating various distribution tables

    dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES),
                                     groupby='weighted_deciles',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [0,
                0,
                -54678669,
                -64005792,
                -64426464,
                32739840,
                207396898,
                317535861,
                575238615,
                984782596,
                1731373913,
                7082515174,
                10748471972,
                1622921432,
                2217477146,
                3242116596]
    tabcol = 'iitax'
    if not np.allclose(dist[tabcol].values, expected,
                       atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [0,
                0,
                2561,
                13268,
                21368,
                28377,
                53186,
                60433,
                79779,
                91010,
                117445,
                128784,
                596211,
                63766,
                51681,
                13337]
    tabcol = 'num_returns_ItemDed'
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [0,
                0,
                836765692,
                2661991174,
                3978757611,
                5306258004,
                7022134388,
                8871843614,
                11530190180,
                14721635194,
                19860290487,
                44177752076,
                118967618420,
                14296456955,
                16895894429,
                12985400692]
    tabcol = 'expanded_income'
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [0,
                0,
                821526457,
                2483359936,
                3714540881,
                4821394144,
                6200512981,
                7763298300,
                9921184240,
                12527297334,
                16314596486,
                33886371300,
                98454082058,
                11265497052,
                13416447851,
                9204426396]
    tabcol = 'aftertax_income'
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES),
                                     groupby='standard_income_bins',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [0,
                0,
                -43150804,
                -77526808,
                -64845122,
                43303823,
                225370761,
                723847940,
                1098042284,
                3264499170,
                2808160213,
                950296405,
                1820474110,
                10748471972]
    tabcol = 'iitax'
    if not np.allclose(dist[tabcol], expected,
                       atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [0,
                0,
                1202,
                13614,
                27319,
                33655,
                50186,
                116612,
                103896,
                181192,
                60527,
                5126,
                2882,
                596211]
    tabcol = 'num_returns_ItemDed'
    if not np.allclose(dist[tabcol].tolist(), expected,
                       atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    if test_failure:
        assert 1 == 2
예제 #17
0
    def distribution_tables(self, calc, groupby, averages=False, scaling=True):
        """
        Get results from self and calc, sort them by GTI into table
        rows defined by groupby, compute grouped statistics, and
        return tables as a pair of Pandas dataframes.
        This method leaves the Calculator object(s) unchanged.
        Note that the returned tables have consistent income groups (based
        on the self GTI) even though the baseline GTI in self and
        the reform GTI in calc are different.

        Parameters
        ----------
        calc : Calculator object or None
            typically represents the reform while self represents the baseline;
            if calc is None, the second returned table is None

        groupby : String object
            options for input: 'weighted_deciles', 'standard_income_bins'
            determines how the columns in resulting Pandas DataFrame are sorted

        averages : boolean
            specifies whether or not monetary table entries are aggregates or
            averages (default value of False implies entries are aggregates)

        scaling : boolean
            specifies whether or not monetary table entries are scaled to
            billions and rounded to three decimal places when averages=False,
            or when averages=True, to thousands and rounded to three decimal
            places.  Regardless of the value of averages, non-monetary table
            entries are scaled to millions and rounded to three decimal places
            (default value of False implies entries are scaled and rounded)

        Return and typical usage
        ------------------------
        dist1, dist2 = calc1.distribution_tables(calc2, 'weighted_deciles')
        OR
        dist1, _ = calc1.distribution_tables(None, 'weighted_deciles')
        (where calc1 is a baseline Calculator object
        and calc2 is a reform Calculator object).
        Each of the dist1 and optional dist2 is a distribution table as a
        Pandas DataFrame with DIST_TABLE_COLUMNS and groupby rows.
        NOTE: when groupby is 'weighted_deciles', the returned tables have 3
              extra rows containing top-decile detail consisting of statistics
              for the 0.90-0.95 quantile range (bottom half of top decile),
              for the 0.95-0.99 quantile range, and
              for the 0.99-1.00 quantile range (top one percent); and the
              returned table splits the bottom decile into filing units with
              negative (denoted by a 0-10n row label),
              zero (denoted by a 0-10z row label), and
              positive (denoted by a 0-10p row label) values of the
              specified income_measure.
        """

        # nested function used only by this method
        def have_same_income_measure(calc1, calc2):
            """
            Return true if calc1 and calc2 contain the same GTI;
            otherwise, return false.  (Note that "same" means nobody's
            GTI differs by more than one cent.)
            """
            im1 = calc1.array('GTI')
            im2 = calc2.array('GTI')
            return np.allclose(im1, im2, rtol=0.0, atol=0.01)

        # main logic of method
        assert calc is None or isinstance(calc, Calculator)
        assert (groupby == 'weighted_deciles'
                or groupby == 'standard_income_bins')
        if calc is not None:
            assert np.allclose(self.array('weight'),
                               calc.array('weight'))  # rows in same order
        var_dataframe = self.distribution_table_dataframe()
        imeasure = 'GTI'
        dt1 = create_distribution_table(var_dataframe, groupby, imeasure,
                                        averages, scaling)
        del var_dataframe
        if calc is None:
            dt2 = None
        else:
            assert calc.current_year == self.current_year
            assert calc.array_len == self.array_len
            var_dataframe = calc.distribution_table_dataframe()
            if have_same_income_measure(self, calc):
                imeasure = 'GTI'
            else:
                imeasure = 'GTI_baseline'
                var_dataframe[imeasure] = self.array('GTI')
            dt2 = create_distribution_table(var_dataframe, groupby, imeasure,
                                            averages, scaling)
            del var_dataframe
        return (dt1, dt2)
예제 #18
0
def test_create_tables(cps_subsample):
    # create a current-law Policy object and Calculator object calc1
    policy1 = Policy()
    records1 = Records.cps_constructor(data=cps_subsample)
    calc1 = Calculator(policy=policy1, records=records1)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt1': [0.15]}}
    policy2 = Policy()
    policy2.implement_reform(reform)
    records2 = Records.cps_constructor(data=cps_subsample)
    calc2 = Calculator(policy=policy2, records=records2)
    calc2.calc_all()

    # test creating various difference tables
    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='large_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        '0.00%', '0.01%', '0.41%', '0.84%', '0.92%', '1.10%', '1.15%', '1.04%',
        '0.78%', '0.27%', 'n/a'
    ]
    assert np.array_equal(diff['perc_aftertax'], expected)

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='webapp_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        '0.00%', '0.01%', '0.41%', '0.84%', '0.92%', '1.10%', '1.15%', '1.04%',
        '0.78%', '0.30%', '0.08%', '0.07%', 'n/a'
    ]
    assert np.array_equal(diff['perc_aftertax'], expected)

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='small_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        '0.00%', '0.01%', '0.02%', '0.16%', '0.64%', '0.82%', '0.87%', '0.92%',
        '1.10%', '1.15%', '1.04%', '0.78%', '0.30%', '0.08%', '0.09%', '0.07%',
        '0.05%', '0.02%', '0.00%', 'n/a'
    ]
    assert np.array_equal(diff['perc_aftertax'], expected)

    diff = create_difference_table(calc1.records,
                                   calc2.records,
                                   groupby='weighted_deciles',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        '0.00%', '0.02%', '0.35%', '0.79%', '0.89%', '0.97%', '1.11%', '1.18%',
        '0.91%', '0.50%', 'n/a'
    ]
    assert np.array_equal(diff['perc_aftertax'], expected)

    with pytest.raises(ValueError):
        create_difference_table(calc1.records,
                                calc2.records,
                                groupby='bad_bins',
                                income_measure='expanded_income',
                                tax_to_diff='iitax')

    # test creating various distribution tables
    dist = create_distribution_table(calc2.records,
                                     groupby='weighted_deciles',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        -8851215, -99666120, -123316561, -85895787, -47357458, 207462144,
        443391189, 978487989, 1709504845, 7631268907, 10605027933
    ]
    assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0)
    expected = [
        1202, 1688, 13506, 18019, 30130, 48244, 80994, 112788, 131260, 146001,
        583832
    ]
    assert np.allclose(dist['num_returns_ItemDed'].tolist(),
                       expected,
                       atol=0.5,
                       rtol=0.0)

    dist = create_distribution_table(calc2.records,
                                     groupby='webapp_income_bins',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        -103274, -83144506, -152523834, -129881470, 85802556, 255480678,
        832529135, 1066963515, 3023956558, 2876331264, 1008672459, 1820944852,
        10605027933
    ]
    assert np.allclose(dist['iitax'], expected, atol=0.5, rtol=0.0)
    expected = [
        0, 1202, 22654, 31665, 30547, 49851, 124786, 97349, 160147, 56806,
        5803, 3023, 583832
    ]
    assert np.allclose(dist['num_returns_ItemDed'].tolist(),
                       expected,
                       atol=0.5,
                       rtol=0.0)

    setattr(calc2.records, 'expanded_income_baseline',
            getattr(calc2.records, 'expanded_income'))
    dist = create_distribution_table(calc2.records,
                                     groupby='webapp_income_bins',
                                     income_measure='expanded_income_baseline',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)

    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='small_income_bins',
                                  income_measure='expanded_income',
                                  result_type='bad_result_type')

    with pytest.raises(ValueError):
        create_distribution_table(calc2.records,
                                  groupby='bad_bins',
                                  income_measure='expanded_income',
                                  result_type='weighted_sum')