def comparison(cname, calc, cmpdata, ofile): """ Write comparison results for cname to ofile. """ # pylint: disable=too-many-locals # generate compare table for cvarname vardf = calc.dataframe(['s006', 'c00100']) # weight and AGI # add compare variable to vardf cvar = np.zeros(calc.array_len) for var in cmpdata[cname]['TC']: cvar += calc.array(var) vardf['cvar'] = cvar # construct AGI table vardf = add_income_table_row_variable(vardf, 'c00100', SOI_AGI_BINS) gbydf = vardf.groupby('table_row', as_index=False) # write AGI table with ALL row at bottom to ofile ofile.write('TABLE for {}\n'.format(cname.split(':')[1])) results = '{:23s}\t{:8.3f}\t{:8.3f}\t{:+6.1f}\n' colhead = '{:23s}\t{:>8s}\t{:>8s}\t{:>6s}\n' ofile.write(colhead.format('AGI category', 'T-C', 'SOI', '%diff')) txc_tot = 0. soi_tot = 0. idx = 0 for grp_interval, grp in gbydf: txc = (grp['cvar'] * grp['s006']).sum() * 1e-9 soi = cmpdata[cname]['SOI'][idx] txc_tot += txc soi_tot += soi if soi > 0: pct_diff = 100. * ((txc / soi) - 1.) else: pct_diff = np.nan glabel = '[{:.8g}, {:.8g})'.format(grp_interval.left, grp_interval.right) ofile.write(results.format(glabel, txc, soi, pct_diff)) idx += 1 pct_diff = 100. * ((txc_tot / soi_tot) - 1.) ofile.write(results.format('ALL', txc_tot, soi_tot, pct_diff))
def fuzzed(df1, df2, reform_affected, table_row_type): """ Create fuzzed df2 dataframe and corresponding unfuzzed df1 dataframe. Parameters ---------- df1: Pandas DataFrame contains results variables for the baseline policy, which are not changed by this function df2: Pandas DataFrame contains results variables for the reform policy, which are not changed by this function reform_affected: boolean numpy array (not changed by this function) True for filing units with a reform-induced combined tax difference; otherwise False table_row_type: string valid values are 'aggr', 'xbin', and 'xdec' Returns ------- df1, df2: Pandas DataFrames where copied df2 is fuzzed to maintain data privacy and where copied df1 has same filing unit order as has the fuzzed df2 """ assert table_row_type in ('aggr', 'xbin', 'xdec') assert len(df1.index) == len(df2.index) assert reform_affected.size == len(df1.index) df1 = copy.deepcopy(df1) df2 = copy.deepcopy(df2) # add copy of reform_affected to df2 df2['reform_affected'] = copy.deepcopy(reform_affected) # construct table rows, for which filing units in each row must be fuzzed if table_row_type == 'xbin': df1 = add_income_table_row_variable(df1, 'expanded_income', STANDARD_INCOME_BINS) df2['expanded_income_baseline'] = df1['expanded_income'] df2 = add_income_table_row_variable(df2, 'expanded_income_baseline', STANDARD_INCOME_BINS) del df2['expanded_income_baseline'] elif table_row_type == 'xdec': df1 = add_quantile_table_row_variable(df1, 'expanded_income', 10, decile_details=True) df2['expanded_income_baseline'] = df1['expanded_income'] df2 = add_quantile_table_row_variable(df2, 'expanded_income_baseline', 10, decile_details=True) del df2['expanded_income_baseline'] elif table_row_type == 'aggr': df1['table_row'] = np.ones(reform_affected.shape, dtype=int) df2['table_row'] = df1['table_row'] gdf1 = df1.groupby('table_row', sort=False) gdf2 = df2.groupby('table_row', sort=False) del df1['table_row'] del df2['table_row'] # fuzz up to NUM_TO_FUZZ filing units randomly chosen in each group # (or table row), where fuzz means to replace the reform (2) results # with the baseline (1) results for each chosen filing unit pd.options.mode.chained_assignment = None group_list = list() for name, group2 in gdf2: indices = np.where(group2['reform_affected']) num = min(len(indices[0]), NUM_TO_FUZZ) if num > 0: choices = np.random.choice(indices[0], size=num, replace=False) group1 = gdf1.get_group(name) for idx in choices: group2.iloc[idx] = group1.iloc[idx] group_list.append(group2) df2 = pd.concat(group_list) del df2['reform_affected'] pd.options.mode.chained_assignment = 'warn' # reinstate index order of df1 and df2 and return df1.sort_index(inplace=True) df2.sort_index(inplace=True) return (df1, df2)