def test_add_quantile_bins(): dfx = pd.DataFrame(data=DATA, columns=['expanded_income', 's006', 'label']) dfb = add_quantile_bins(dfx, 'expanded_income', 100, weight_by_income_measure=False) bin_labels = dfb['bins'].unique() default_labels = set(range(1, 101)) for lab in bin_labels: assert lab in default_labels dfb = add_quantile_bins(dfx, 'expanded_income', 100, weight_by_income_measure=True) assert 'bins' in dfb
def fuzz(df1, df2, bin_type, imeasure, suffix, cols_to_fuzz): """ Fuzz some df2 records in each bin defined by bin_type and imeasure. The fuzzed records have their post-reform tax results (in df2) set to their pre-reform tax results (in df1). """ # pylint: disable=too-many-arguments assert bin_type == 'dec' or bin_type == 'bin' or bin_type == 'agg' if bin_type == 'dec': df2 = add_quantile_bins(df2, imeasure, 10) elif bin_type == 'bin': df2 = add_income_bins(df2, imeasure, bins=WEBAPP_INCOME_BINS) else: df2 = add_quantile_bins(df2, imeasure, 1) gdf2 = df2.groupby('bins') df2['nofuzz'] = gdf2['mask'].transform(chooser) for col in cols_to_fuzz: df2[col + suffix] = (df2[col] * df2['nofuzz'] - df1[col] * df2['nofuzz'] + df1[col])
def test_add_quantile_bins(): dfx = pd.DataFrame(data=DATA, columns=['expanded_income', 's006', 'label']) dfb = add_quantile_bins(dfx, 'expanded_income', 100, weight_by_income_measure=False) bin_labels = dfb['bins'].unique() default_labels = set(range(1, 101)) for lab in bin_labels: assert lab in default_labels # custom labels dfb = add_quantile_bins(dfx, 'expanded_income', 100, weight_by_income_measure=True) assert 'bins' in dfb custom_labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] dfb = add_quantile_bins(dfx, 'expanded_income', 10, labels=custom_labels) assert 'bins' in dfb bin_labels = dfb['bins'].unique() for lab in bin_labels: assert lab in custom_labels
def create(df1, df2, bin_type, imeasure, suffix, cols_to_fuzz, do_fuzzing): """ Create additional df2 columns. If do_fuzzing is True, also fuzz some df2 records in each bin defined by bin_type and imeasure with the fuzzed records having their post-reform tax results (in df2) set to their pre-reform tax results (in df1). """ # pylint: disable=too-many-arguments assert bin_type == 'dec' or bin_type == 'bin' or bin_type == 'agg' if bin_type == 'dec': df2 = add_quantile_bins(df2, imeasure, 10) elif bin_type == 'bin': df2 = add_income_bins(df2, imeasure, bins=STANDARD_INCOME_BINS) else: df2 = add_quantile_bins(df2, imeasure, 1) gdf2 = df2.groupby('bins') if do_fuzzing: df2['nofuzz'] = gdf2['mask'].transform(chooser) else: # never do any results fuzzing df2['nofuzz'] = np.ones(df2.shape[0], dtype=np.int8) for col in cols_to_fuzz: df2[col + suffix] = (df2[col] * df2['nofuzz'] - df1[col] * df2['nofuzz'] + df1[col])
def write_decile_table(dfx, tfile, tkind='Totals'): """ Write to tfile the tkind decile table using dfx DataFrame. """ dfx = add_quantile_bins(dfx, 'expanded_income', 10, weight_by_income_measure=False) gdfx = dfx.groupby('bins', as_index=False) rtns_series = gdfx.apply(unweighted_sum, 's006') xinc_series = gdfx.apply(weighted_sum, 'expanded_income') itax_series = gdfx.apply(weighted_sum, 'iitax') ptax_series = gdfx.apply(weighted_sum, 'payrolltax') htax_series = gdfx.apply(weighted_sum, 'lumpsum_tax') ctax_series = gdfx.apply(weighted_sum, 'combined') # write decile table to text file row = 'Weighted Tax {} by Baseline Expanded-Income Decile\n' tfile.write(row.format(tkind)) rowfmt = '{}{}{}{}{}{}\n' row = rowfmt.format(' Returns', ' ExpInc', ' IncTax', ' PayTax', ' LSTax', ' AllTax') tfile.write(row) row = rowfmt.format(' (#m)', ' ($b)', ' ($b)', ' ($b)', ' ($b)', ' ($b)') tfile.write(row) rowfmt = '{:9.2f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}\n' for decile in range(0, 10): row = '{:2d}'.format(decile) row += rowfmt.format(rtns_series[decile] * 1e-6, xinc_series[decile] * 1e-9, itax_series[decile] * 1e-9, ptax_series[decile] * 1e-9, htax_series[decile] * 1e-9, ctax_series[decile] * 1e-9) tfile.write(row) row = ' A' row += rowfmt.format(rtns_series.sum() * 1e-6, xinc_series.sum() * 1e-9, itax_series.sum() * 1e-9, ptax_series.sum() * 1e-9, htax_series.sum() * 1e-9, ctax_series.sum() * 1e-9) tfile.write(row)