def comparison(cname, calc, cmpdata, ofile):
    """
    Write comparison results for cname to ofile.
    """
    # pylint: disable=too-many-locals
    # generate compare table for cvarname
    vardf = calc.dataframe(['s006', 'c00100'])  # weight and AGI
    # add compare variable to vardf
    cvar = np.zeros(calc.array_len)
    for var in cmpdata[cname]['TC']:
        cvar += calc.array(var)
    vardf['cvar'] = cvar
    # construct AGI table
    vardf = add_income_table_row_variable(vardf, 'c00100', SOI_AGI_BINS)
    gbydf = vardf.groupby('table_row', as_index=False)
    # write AGI table with ALL row at bottom to ofile
    ofile.write('TABLE for {}\n'.format(cname.split(':')[1]))
    results = '{:23s}\t{:8.3f}\t{:8.3f}\t{:+6.1f}\n'
    colhead = '{:23s}\t{:>8s}\t{:>8s}\t{:>6s}\n'
    ofile.write(colhead.format('AGI category', 'T-C', 'SOI', '%diff'))
    txc_tot = 0.
    soi_tot = 0.
    idx = 0
    for grp_interval, grp in gbydf:
        txc = (grp['cvar'] * grp['s006']).sum() * 1e-9
        soi = cmpdata[cname]['SOI'][idx]
        txc_tot += txc
        soi_tot += soi
        if soi > 0:
            pct_diff = 100. * ((txc / soi) - 1.)
        else:
            pct_diff = np.nan
        glabel = '[{:.8g}, {:.8g})'.format(grp_interval.left,
                                           grp_interval.right)
        ofile.write(results.format(glabel, txc, soi, pct_diff))
        idx += 1
    pct_diff = 100. * ((txc_tot / soi_tot) - 1.)
    ofile.write(results.format('ALL', txc_tot, soi_tot, pct_diff))
def comparison(cname, calc, cmpdata, ofile):
    """
    Write comparison results for cname to ofile.
    """
    # pylint: disable=too-many-locals
    # generate compare table for cvarname
    vardf = calc.dataframe(['s006', 'c00100'])  # weight and AGI
    # add compare variable to vardf
    cvar = np.zeros(calc.array_len)
    for var in cmpdata[cname]['TC']:
        cvar += calc.array(var)
    vardf['cvar'] = cvar
    # construct AGI table
    vardf = add_income_table_row_variable(vardf, 'c00100', SOI_AGI_BINS)
    gbydf = vardf.groupby('table_row', as_index=False)
    # write AGI table with ALL row at bottom to ofile
    ofile.write('TABLE for {}\n'.format(cname.split(':')[1]))
    results = '{:23s}\t{:8.3f}\t{:8.3f}\t{:+6.1f}\n'
    colhead = '{:23s}\t{:>8s}\t{:>8s}\t{:>6s}\n'
    ofile.write(colhead.format('AGI category', 'T-C', 'SOI', '%diff'))
    txc_tot = 0.
    soi_tot = 0.
    idx = 0
    for grp_interval, grp in gbydf:
        txc = (grp['cvar'] * grp['s006']).sum() * 1e-9
        soi = cmpdata[cname]['SOI'][idx]
        txc_tot += txc
        soi_tot += soi
        if soi > 0:
            pct_diff = 100. * ((txc / soi) - 1.)
        else:
            pct_diff = np.nan
        glabel = '[{:.8g}, {:.8g})'.format(grp_interval.left,
                                           grp_interval.right)
        ofile.write(results.format(glabel, txc, soi, pct_diff))
        idx += 1
    pct_diff = 100. * ((txc_tot / soi_tot) - 1.)
    ofile.write(results.format('ALL', txc_tot, soi_tot, pct_diff))
Exemple #3
0
def fuzzed(df1, df2, reform_affected, table_row_type):
    """
    Create fuzzed df2 dataframe and corresponding unfuzzed df1 dataframe.

    Parameters
    ----------
    df1: Pandas DataFrame
        contains results variables for the baseline policy, which are not
        changed by this function

    df2: Pandas DataFrame
        contains results variables for the reform policy, which are not
        changed by this function

    reform_affected: boolean numpy array (not changed by this function)
        True for filing units with a reform-induced combined tax difference;
        otherwise False

    table_row_type: string
        valid values are 'aggr', 'xbin', and 'xdec'

    Returns
    -------
    df1, df2: Pandas DataFrames
        where copied df2 is fuzzed to maintain data privacy and
        where copied df1 has same filing unit order as has the fuzzed df2
    """
    assert table_row_type in ('aggr', 'xbin', 'xdec')
    assert len(df1.index) == len(df2.index)
    assert reform_affected.size == len(df1.index)
    df1 = copy.deepcopy(df1)
    df2 = copy.deepcopy(df2)
    # add copy of reform_affected to df2
    df2['reform_affected'] = copy.deepcopy(reform_affected)
    # construct table rows, for which filing units in each row must be fuzzed
    if table_row_type == 'xbin':
        df1 = add_income_table_row_variable(df1, 'expanded_income',
                                            STANDARD_INCOME_BINS)
        df2['expanded_income_baseline'] = df1['expanded_income']
        df2 = add_income_table_row_variable(df2, 'expanded_income_baseline',
                                            STANDARD_INCOME_BINS)
        del df2['expanded_income_baseline']
    elif table_row_type == 'xdec':
        df1 = add_quantile_table_row_variable(df1, 'expanded_income',
                                              10, decile_details=True)
        df2['expanded_income_baseline'] = df1['expanded_income']
        df2 = add_quantile_table_row_variable(df2, 'expanded_income_baseline',
                                              10, decile_details=True)
        del df2['expanded_income_baseline']
    elif table_row_type == 'aggr':
        df1['table_row'] = np.ones(reform_affected.shape, dtype=int)
        df2['table_row'] = df1['table_row']
    gdf1 = df1.groupby('table_row', sort=False)
    gdf2 = df2.groupby('table_row', sort=False)
    del df1['table_row']
    del df2['table_row']
    # fuzz up to NUM_TO_FUZZ filing units randomly chosen in each group
    # (or table row), where fuzz means to replace the reform (2) results
    # with the baseline (1) results for each chosen filing unit
    pd.options.mode.chained_assignment = None
    group_list = list()
    for name, group2 in gdf2:
        indices = np.where(group2['reform_affected'])
        num = min(len(indices[0]), NUM_TO_FUZZ)
        if num > 0:
            choices = np.random.choice(indices[0], size=num, replace=False)
            group1 = gdf1.get_group(name)
            for idx in choices:
                group2.iloc[idx] = group1.iloc[idx]
        group_list.append(group2)
    df2 = pd.concat(group_list)
    del df2['reform_affected']
    pd.options.mode.chained_assignment = 'warn'
    # reinstate index order of df1 and df2 and return
    df1.sort_index(inplace=True)
    df2.sort_index(inplace=True)
    return (df1, df2)
Exemple #4
0
def fuzzed(df1, df2, reform_affected, table_row_type):
    """
    Create fuzzed df2 dataframe and corresponding unfuzzed df1 dataframe.

    Parameters
    ----------
    df1: Pandas DataFrame
        contains results variables for the baseline policy, which are not
        changed by this function

    df2: Pandas DataFrame
        contains results variables for the reform policy, which are not
        changed by this function

    reform_affected: boolean numpy array (not changed by this function)
        True for filing units with a reform-induced combined tax difference;
        otherwise False

    table_row_type: string
        valid values are 'aggr', 'xbin', and 'xdec'

    Returns
    -------
    df1, df2: Pandas DataFrames
        where copied df2 is fuzzed to maintain data privacy and
        where copied df1 has same filing unit order as has the fuzzed df2
    """
    assert table_row_type in ('aggr', 'xbin', 'xdec')
    assert len(df1.index) == len(df2.index)
    assert reform_affected.size == len(df1.index)
    df1 = copy.deepcopy(df1)
    df2 = copy.deepcopy(df2)
    # add copy of reform_affected to df2
    df2['reform_affected'] = copy.deepcopy(reform_affected)
    # construct table rows, for which filing units in each row must be fuzzed
    if table_row_type == 'xbin':
        df1 = add_income_table_row_variable(df1, 'expanded_income',
                                            STANDARD_INCOME_BINS)
        df2['expanded_income_baseline'] = df1['expanded_income']
        df2 = add_income_table_row_variable(df2, 'expanded_income_baseline',
                                            STANDARD_INCOME_BINS)
        del df2['expanded_income_baseline']
    elif table_row_type == 'xdec':
        df1 = add_quantile_table_row_variable(df1, 'expanded_income',
                                              10, decile_details=True)
        df2['expanded_income_baseline'] = df1['expanded_income']
        df2 = add_quantile_table_row_variable(df2, 'expanded_income_baseline',
                                              10, decile_details=True)
        del df2['expanded_income_baseline']
    elif table_row_type == 'aggr':
        df1['table_row'] = np.ones(reform_affected.shape, dtype=int)
        df2['table_row'] = df1['table_row']
    gdf1 = df1.groupby('table_row', sort=False)
    gdf2 = df2.groupby('table_row', sort=False)
    del df1['table_row']
    del df2['table_row']
    # fuzz up to NUM_TO_FUZZ filing units randomly chosen in each group
    # (or table row), where fuzz means to replace the reform (2) results
    # with the baseline (1) results for each chosen filing unit
    pd.options.mode.chained_assignment = None
    group_list = list()
    for name, group2 in gdf2:
        indices = np.where(group2['reform_affected'])
        num = min(len(indices[0]), NUM_TO_FUZZ)
        if num > 0:
            choices = np.random.choice(indices[0], size=num, replace=False)
            group1 = gdf1.get_group(name)
            for idx in choices:
                group2.iloc[idx] = group1.iloc[idx]
        group_list.append(group2)
    df2 = pd.concat(group_list)
    del df2['reform_affected']
    pd.options.mode.chained_assignment = 'warn'
    # reinstate index order of df1 and df2 and return
    df1.sort_index(inplace=True)
    df2.sort_index(inplace=True)
    return (df1, df2)