예제 #1
0
def test_add_quantile_trow_var():
    dfx = pd.DataFrame(data=DATA, columns=['expanded_income', 's006', 'label'])
    dfb = add_quantile_table_row_variable(dfx, 'expanded_income',
                                          100, decile_details=False,
                                          weight_by_income_measure=False)
    bin_labels = dfb['table_row'].unique()
    default_labels = set(range(1, 101))
    for lab in bin_labels:
        assert lab in default_labels
    dfb = add_quantile_table_row_variable(dfx, 'expanded_income',
                                          100, decile_details=False)
    assert 'table_row' in dfb
    with pytest.raises(ValueError):
        dfb = add_quantile_table_row_variable(dfx, 'expanded_income',
                                              100, decile_details=True)
def test_add_quantile_trow_var():
    dfx = pd.DataFrame(data=DATA, columns=['expanded_income', 's006', 'label'])
    dfb = add_quantile_table_row_variable(dfx, 'expanded_income',
                                          100, decile_details=False,
                                          weight_by_income_measure=False)
    bin_labels = dfb['table_row'].unique()
    default_labels = set(range(1, 101))
    for lab in bin_labels:
        assert lab in default_labels
    dfb = add_quantile_table_row_variable(dfx, 'expanded_income',
                                          100, decile_details=False)
    assert 'table_row' in dfb
    with pytest.raises(ValueError):
        dfb = add_quantile_table_row_variable(dfx, 'expanded_income',
                                              100, decile_details=True)
예제 #3
0
 def create(df1, df2, bin_type, imeasure, suffix, cols_to_fuzz, do_fuzzing):
     """
     Create additional df2 columns.  If do_fuzzing is True, also
     fuzz some df2 records in each bin defined by bin_type and imeasure
     with the fuzzed records having their post-reform tax results (in df2)
     set to their pre-reform tax results (in df1).
     """
     # pylint: disable=too-many-arguments
     assert bin_type == 'dec' or bin_type == 'bin' or bin_type == 'agg'
     if bin_type == 'dec':
         df2 = add_quantile_table_row_variable(df2,
                                               imeasure,
                                               10,
                                               decile_details=True)
         gdf2 = df2.groupby('table_row')
         del df2['table_row']
     elif bin_type == 'bin':
         df2 = add_income_table_row_variable(df2,
                                             imeasure,
                                             bins=STANDARD_INCOME_BINS)
         gdf2 = df2.groupby('table_row')
         del df2['table_row']
     else:
         gdf2 = df2
     if do_fuzzing:
         df2['nofuzz'] = gdf2['mask'].transform(chooser)
     else:  # never do any results fuzzing
         df2['nofuzz'] = np.ones(df2.shape[0], dtype=np.int8)
     for col in cols_to_fuzz:
         df2[col + suffix] = (df2[col] * df2['nofuzz'] -
                              df1[col] * df2['nofuzz'] + df1[col])
예제 #4
0
 def write_decile_table(dfx, tfile, tkind='Totals'):
     """
     Write to tfile the tkind decile table using dfx DataFrame.
     """
     dfx = add_quantile_table_row_variable(dfx, 'expanded_income', 10,
                                           decile_details=False,
                                           pop_quantiles=False,
                                           weight_by_income_measure=False)
     gdfx = dfx.groupby('table_row', as_index=False)
     rtns_series = gdfx.apply(unweighted_sum, 's006').values[:, 1]
     xinc_series = gdfx.apply(weighted_sum, 'expanded_income').values[:, 1]
     itax_series = gdfx.apply(weighted_sum, 'iitax').values[:, 1]
     ptax_series = gdfx.apply(weighted_sum, 'payrolltax').values[:, 1]
     htax_series = gdfx.apply(weighted_sum, 'lumpsum_tax').values[:, 1]
     ctax_series = gdfx.apply(weighted_sum, 'combined').values[:, 1]
     # write decile table to text file
     row = 'Weighted Tax {} by Baseline Expanded-Income Decile\n'
     tfile.write(row.format(tkind))
     rowfmt = '{}{}{}{}{}{}\n'
     row = rowfmt.format('    Returns',
                         '    ExpInc',
                         '    IncTax',
                         '    PayTax',
                         '     LSTax',
                         '    AllTax')
     tfile.write(row)
     row = rowfmt.format('       (#m)',
                         '      ($b)',
                         '      ($b)',
                         '      ($b)',
                         '      ($b)',
                         '      ($b)')
     tfile.write(row)
     rowfmt = '{:9.2f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}\n'
     for decile in range(0, 10):
         row = '{:2d}'.format(decile)
         row += rowfmt.format(rtns_series[decile] * 1e-6,
                              xinc_series[decile] * 1e-9,
                              itax_series[decile] * 1e-9,
                              ptax_series[decile] * 1e-9,
                              htax_series[decile] * 1e-9,
                              ctax_series[decile] * 1e-9)
         tfile.write(row)
     row = ' A'
     row += rowfmt.format(rtns_series.sum() * 1e-6,
                          xinc_series.sum() * 1e-9,
                          itax_series.sum() * 1e-9,
                          ptax_series.sum() * 1e-9,
                          htax_series.sum() * 1e-9,
                          ctax_series.sum() * 1e-9)
     tfile.write(row)
     del gdfx
     del rtns_series
     del xinc_series
     del itax_series
     del ptax_series
     del htax_series
     del ctax_series
     gc.collect()
예제 #5
0
 def write_decile_table(dfx, tfile, tkind='Totals'):
     """
     Write to tfile the tkind decile table using dfx DataFrame.
     """
     dfx = add_quantile_table_row_variable(dfx, 'expanded_income', 10,
                                           decile_details=False,
                                           weight_by_income_measure=False)
     gdfx = dfx.groupby('table_row', as_index=False)
     rtns_series = gdfx.apply(unweighted_sum, 's006')
     xinc_series = gdfx.apply(weighted_sum, 'expanded_income')
     itax_series = gdfx.apply(weighted_sum, 'iitax')
     ptax_series = gdfx.apply(weighted_sum, 'payrolltax')
     htax_series = gdfx.apply(weighted_sum, 'lumpsum_tax')
     ctax_series = gdfx.apply(weighted_sum, 'combined')
     # write decile table to text file
     row = 'Weighted Tax {} by Baseline Expanded-Income Decile\n'
     tfile.write(row.format(tkind))
     rowfmt = '{}{}{}{}{}{}\n'
     row = rowfmt.format('    Returns',
                         '    ExpInc',
                         '    IncTax',
                         '    PayTax',
                         '     LSTax',
                         '    AllTax')
     tfile.write(row)
     row = rowfmt.format('       (#m)',
                         '      ($b)',
                         '      ($b)',
                         '      ($b)',
                         '      ($b)',
                         '      ($b)')
     tfile.write(row)
     rowfmt = '{:9.2f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}\n'
     for decile in range(0, 10):
         row = '{:2d}'.format(decile)
         row += rowfmt.format(rtns_series[decile] * 1e-6,
                              xinc_series[decile] * 1e-9,
                              itax_series[decile] * 1e-9,
                              ptax_series[decile] * 1e-9,
                              htax_series[decile] * 1e-9,
                              ctax_series[decile] * 1e-9)
         tfile.write(row)
     row = ' A'
     row += rowfmt.format(rtns_series.sum() * 1e-6,
                          xinc_series.sum() * 1e-9,
                          itax_series.sum() * 1e-9,
                          ptax_series.sum() * 1e-9,
                          htax_series.sum() * 1e-9,
                          ctax_series.sum() * 1e-9)
     tfile.write(row)
     del gdfx
     del rtns_series
     del xinc_series
     del itax_series
     del ptax_series
     del htax_series
     del ctax_series
     gc.collect()
예제 #6
0
def test_create_tables(cps_subsample):
    # pylint: disable=too-many-statements,too-many-branches
    # create a current-law Policy object and Calculator object calc1
    rec = Records.cps_constructor(data=cps_subsample)
    pol = Policy()
    calc1 = Calculator(policy=pol, records=rec)
    calc1.calc_all()
    # create a policy-reform Policy object and Calculator object calc2
    reform = {2013: {'_II_rt1': [0.15]}}
    pol.implement_reform(reform)
    calc2 = Calculator(policy=pol, records=rec)
    calc2.calc_all()

    test_failure = False

    # test creating various difference tables

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='large_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        np.nan, np.nan, -0.16, -0.57, -0.72, -0.69, -0.82, -0.80, -0.75, -0.65,
        -0.18, -0.59
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='standard_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        np.nan, np.nan, -0.16, -0.57, -0.72, -0.69, -0.82, -0.80, -0.75, -0.65,
        -0.23, -0.09, -0.06, -0.59
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='small_income_bins',
                                   income_measure='expanded_income',
                                   tax_to_diff='iitax')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        np.nan, np.nan, -0.30, -0.10, -0.24, -0.76, -0.67, -0.75, -0.69, -0.82,
        -0.80, -0.75, -0.65, -0.23, -0.09, -0.08, -0.07, -0.05, -0.02, np.nan,
        -0.59
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                   calc2.dataframe(DIFF_VARIABLES),
                                   groupby='weighted_deciles',
                                   income_measure='expanded_income',
                                   tax_to_diff='combined')
    assert isinstance(diff, pd.DataFrame)
    expected = [
        0, 0, 1219678, 15503037, 25922077, 35000592, 48336897, 62637728,
        79750078, 93136108, 116996252, 102458801, 580961247, 63156380,
        33664610, 5637811
    ]
    tabcol = 'tot_change'
    if not np.allclose(diff[tabcol].values, expected, atol=0.51, rtol=0.0):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0.00, 0.00, 0.21, 2.67, 4.46, 6.02, 8.32, 10.78, 13.73, 16.03, 20.14,
        17.64, 100.00, 10.87, 5.79, 0.97
    ]
    tabcol = 'share_of_change'
    if not np.allclose(diff[tabcol].values, expected, atol=0.005, rtol=0.0):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))
    expected = [
        np.nan, np.nan, -0.15, -0.62, -0.70, -0.73, -0.78, -0.80, -0.80, -0.74,
        -0.71, -0.30, -0.59, -0.55, -0.25, -0.06
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))
    expected = [
        np.nan, np.nan, -0.15, -0.62, -0.70, -0.73, -0.78, -0.80, -0.80, -0.74,
        -0.71, -0.30, -0.59, -0.55, -0.25, -0.06
    ]
    tabcol = 'pc_aftertaxinc'
    if not np.allclose(
            diff[tabcol].values, expected, atol=0.005, rtol=0.0,
            equal_nan=True):
        test_failure = True
        print('diff', tabcol)
        for val in diff[tabcol].values:
            print('{:.2f},'.format(val))

    # test creating various distribution tables

    dvdf = calc2.distribution_table_dataframe()
    dvdf = add_quantile_table_row_variable(dvdf,
                                           'expanded_income',
                                           num_quantiles=10,
                                           decile_details=True)
    dist = create_distribution_table(dvdf,
                                     groupby='weighted_deciles',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        0, 0, -53644343, -65258622, -57617119, 37391333, 200879230, 329784586,
        553827330, 1015854407, 1731283600, 7090603505, 10783103907, 1638192777,
        2213960052, 3238450675
    ]
    tabcol = 'iitax'
    if not np.allclose(dist[tabcol].values, expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 0, 2561, 12610, 21936, 29172, 50890, 61563, 78247, 91823, 118523,
        128886, 596211, 63986, 51634, 13266
    ]
    tabcol = 'num_returns_ItemDed'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 0, 835224673, 2639667638, 3940559051, 5286856071, 6972849344,
        8881099529, 11467767759, 14761195525, 19832126806, 44213000235,
        118830346631, 14399218059, 16868648076, 12945134101
    ]
    tabcol = 'expanded_income'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 0, 818813684, 2466000535, 3671150517, 4790979126, 6173998985,
        7754183496, 9907604744, 12510477225, 16273592612, 33915377411,
        98282178334, 11345456373, 13400757263, 9169163776
    ]
    tabcol = 'aftertax_income'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    dist = create_distribution_table(calc2.distribution_table_dataframe(),
                                     groupby='standard_income_bins',
                                     income_measure='expanded_income',
                                     result_type='weighted_sum')
    assert isinstance(dist, pd.DataFrame)
    expected = [
        0, 0, -42244205, -76727831, -62581860, 53797887, 217016689, 723516183,
        1108097059, 3272479928, 2818979541, 950296405, 1820474110, 10783103907
    ]
    tabcol = 'iitax'
    if not np.allclose(dist[tabcol], expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))
    expected = [
        0, 0, 1202, 13614, 27272, 34407, 48265, 117225, 103319, 181885, 61014,
        5126, 2882, 596211
    ]
    tabcol = 'num_returns_ItemDed'
    if not np.allclose(dist[tabcol].tolist(), expected, atol=0.5, rtol=0.0):
        test_failure = True
        print('dist', tabcol)
        for val in dist[tabcol].values:
            print('{:.0f},'.format(val))

    if test_failure:
        assert 1 == 2
예제 #7
0
def fuzzed(df1, df2, reform_affected, table_row_type):
    """
    Create fuzzed df2 dataframe and corresponding unfuzzed df1 dataframe.

    Parameters
    ----------
    df1: Pandas DataFrame
        contains results variables for the baseline policy, which are not
        changed by this function

    df2: Pandas DataFrame
        contains results variables for the reform policy, which are not
        changed by this function

    reform_affected: boolean numpy array (not changed by this function)
        True for filing units with a reform-induced combined tax difference;
        otherwise False

    table_row_type: string
        valid values are 'aggr', 'xbin', and 'xdec'

    Returns
    -------
    df1, df2: Pandas DataFrames
        where copied df2 is fuzzed to maintain data privacy and
        where copied df1 has same filing unit order as has the fuzzed df2
    """
    assert (table_row_type == 'aggr' or table_row_type == 'xbin'
            or table_row_type == 'xdec')
    assert len(df1.index) == len(df2.index)
    assert reform_affected.size == len(df1.index)
    df1 = copy.deepcopy(df1)
    df2 = copy.deepcopy(df2)
    # add copy of reform_affected to df2
    df2['reform_affected'] = copy.deepcopy(reform_affected)
    # construct table rows, for which filing units in each row must be fuzzed
    if table_row_type == 'xbin':
        df1 = add_income_table_row_variable(df1, 'expanded_income',
                                            STANDARD_INCOME_BINS)
        df2['expanded_income_baseline'] = df1['expanded_income']
        df2 = add_income_table_row_variable(df2, 'expanded_income_baseline',
                                            STANDARD_INCOME_BINS)
        del df2['expanded_income_baseline']
    elif table_row_type == 'xdec':
        df1 = add_quantile_table_row_variable(df1,
                                              'expanded_income',
                                              10,
                                              decile_details=True)
        df2['expanded_income_baseline'] = df1['expanded_income']
        df2 = add_quantile_table_row_variable(df2,
                                              'expanded_income_baseline',
                                              10,
                                              decile_details=True)
        del df2['expanded_income_baseline']
    elif table_row_type == 'aggr':
        df1['table_row'] = np.ones(reform_affected.shape, dtype=int)
        df2['table_row'] = df1['table_row']
    gdf1 = df1.groupby('table_row', sort=False)
    gdf2 = df2.groupby('table_row', sort=False)
    del df1['table_row']
    del df2['table_row']
    # fuzz up to NUM_TO_FUZZ filing units randomly chosen in each group
    # (or table row), where fuzz means to replace the reform (2) results
    # with the baseline (1) results for each chosen filing unit
    pd.options.mode.chained_assignment = None
    group_list = list()
    for name, group2 in gdf2:
        indices = np.where(group2['reform_affected'])
        num = min(len(indices[0]), NUM_TO_FUZZ)
        if num > 0:
            choices = np.random.choice(
                indices[0],  # pylint: disable=no-member
                size=num,
                replace=False)
            group1 = gdf1.get_group(name)
            for idx in choices:
                group2.iloc[idx] = group1.iloc[idx]
        group_list.append(group2)
    df2 = pd.concat(group_list)
    del df2['reform_affected']
    pd.options.mode.chained_assignment = 'warn'
    # reinstate index order of df1 and df2 and return
    df1.sort_index(inplace=True)
    df2.sort_index(inplace=True)
    return (df1, df2)
예제 #8
0
def summary(df1, df2, mask):
    """
    df1 contains raw results for baseline plan
    df2 contains raw results for reform plan
    mask is the boolean array specifying records with reform-induced tax diffs
    returns dictionary of summary results DataFrames
    """
    # pylint: disable=too-many-statements,too-many-locals

    df2_xdec, df2_xbin, df2_aggr = create_results_columns(df1, df2, mask)
    df1_xdec = add_quantile_table_row_variable(df1,
                                               'expanded_income',
                                               10,
                                               decile_details=True)
    del df1_xdec['table_row']
    df1_xbin = add_income_table_row_variable(df1,
                                             'expanded_income',
                                             bins=STANDARD_INCOME_BINS)
    del df1_xbin['table_row']

    summ = dict()

    # tax difference totals between reform and baseline
    tdiff = df2_aggr['iitax_agg'] - df1['iitax']
    aggr_itax_d = (tdiff * df2['s006']).sum()
    tdiff = df2_aggr['payrolltax_agg'] - df1['payrolltax']
    aggr_ptax_d = (tdiff * df2['s006']).sum()
    tdiff = df2_aggr['combined_agg'] - df1['combined']
    aggr_comb_d = (tdiff * df2['s006']).sum()
    aggrd = [aggr_itax_d, aggr_ptax_d, aggr_comb_d]
    summ['aggr_d'] = pd.DataFrame(data=aggrd, index=AGGR_ROW_NAMES)

    # totals for baseline
    aggr_itax_1 = (df1['iitax'] * df1['s006']).sum()
    aggr_ptax_1 = (df1['payrolltax'] * df1['s006']).sum()
    aggr_comb_1 = (df1['combined'] * df1['s006']).sum()
    aggr1 = [aggr_itax_1, aggr_ptax_1, aggr_comb_1]
    summ['aggr_1'] = pd.DataFrame(data=aggr1, index=AGGR_ROW_NAMES)

    # totals for reform
    aggr_itax_2 = (df2_aggr['iitax_agg'] * df2['s006']).sum()
    aggr_ptax_2 = (df2_aggr['payrolltax_agg'] * df2['s006']).sum()
    aggr_comb_2 = (df2_aggr['combined_agg'] * df2['s006']).sum()
    aggr2 = [aggr_itax_2, aggr_ptax_2, aggr_comb_2]
    summ['aggr_2'] = pd.DataFrame(data=aggr2, index=AGGR_ROW_NAMES)

    del df1
    del df2

    # create difference tables grouped by xdec
    df2_xdec['iitax'] = df2_xdec['iitax_xdec']
    summ['diff_itax_xdec'] = \
        create_difference_table(df1_xdec, df2_xdec,
                                groupby='weighted_deciles',
                                income_measure='expanded_income',
                                tax_to_diff='iitax')

    df2_xdec['payrolltax'] = df2_xdec['payrolltax_xdec']
    summ['diff_ptax_xdec'] = \
        create_difference_table(df1_xdec, df2_xdec,
                                groupby='weighted_deciles',
                                income_measure='expanded_income',
                                tax_to_diff='payrolltax')

    df2_xdec['combined'] = df2_xdec['combined_xdec']
    summ['diff_comb_xdec'] = \
        create_difference_table(df1_xdec, df2_xdec,
                                groupby='weighted_deciles',
                                income_measure='expanded_income',
                                tax_to_diff='combined')

    # create difference tables grouped by xbin
    df2_xbin['iitax'] = df2_xbin['iitax_xbin']
    diff_itax_xbin = \
        create_difference_table(df1_xdec, df2_xbin,
                                groupby='standard_income_bins',
                                income_measure='expanded_income',
                                tax_to_diff='iitax')
    summ['diff_itax_xbin'] = diff_itax_xbin

    df2_xbin['payrolltax'] = df2_xbin['payrolltax_xbin']
    diff_ptax_xbin = \
        create_difference_table(df1_xbin, df2_xbin,
                                groupby='standard_income_bins',
                                income_measure='expanded_income',
                                tax_to_diff='payrolltax')
    summ['diff_ptax_xbin'] = diff_ptax_xbin

    df2_xbin['combined'] = df2_xbin['combined_xbin']
    diff_comb_xbin = \
        create_difference_table(df1_xbin, df2_xbin,
                                groupby='standard_income_bins',
                                income_measure='expanded_income',
                                tax_to_diff='combined')
    summ['diff_comb_xbin'] = diff_comb_xbin

    # create distribution tables grouped by xdec
    summ['dist1_xdec'] = \
        create_distribution_table(df1_xdec, groupby='weighted_deciles',
                                  income_measure='expanded_income',
                                  result_type='weighted_sum')

    suffix = '_xdec'
    df2_cols_with_suffix = [c for c in list(df2_xdec) if c.endswith(suffix)]
    for col in df2_cols_with_suffix:
        root_col_name = col.replace(suffix, '')
        df2_xdec[root_col_name] = df2_xdec[col]
    df2_xdec['expanded_income_baseline'] = df1_xdec['expanded_income']
    summ['dist2_xdec'] = \
        create_distribution_table(df2_xdec, groupby='weighted_deciles',
                                  income_measure='expanded_income_baseline',
                                  result_type='weighted_sum')

    # create distribution tables grouped by xbin
    dist1_xbin = \
        create_distribution_table(df1_xbin, groupby='standard_income_bins',
                                  income_measure='expanded_income',
                                  result_type='weighted_sum')
    summ['dist1_xbin'] = dist1_xbin

    suffix = '_xbin'
    df2_cols_with_suffix = [c for c in list(df2_xbin) if c.endswith(suffix)]
    for col in df2_cols_with_suffix:
        root_col_name = col.replace(suffix, '')
        df2_xbin[root_col_name] = df2_xbin[col]
    df2_xbin['expanded_income_baseline'] = df1_xbin['expanded_income']
    dist2_xbin = \
        create_distribution_table(df2_xbin, groupby='standard_income_bins',
                                  income_measure='expanded_income_baseline',
                                  result_type='weighted_sum')
    summ['dist2_xbin'] = dist2_xbin

    # return dictionary of summary results
    return summ