Python listifyの例、microdf.listify Pythonの例

コード例 #1

0

ファイルを表示

def combine_base_reform(
    base: pd.DataFrame,
    reform: pd.DataFrame,
    base_cols: Optional[list],
    cols: Optional[list],
    reform_cols: Optional[list],
) -> pd.DataFrame:
    """Combine base and reform with certain columns.

    :param base: Base DataFrame. Index must match reform.
    :type base: pd.DataFrame
    :param reform: Reform DataFrame. Index must match base.
    :type reform: pd.DataFrame
    :param base_cols: Columns in base to keep.
    :type base_cols: list, optional
    :param cols: Columns to keep from both base and reform.
    :type cols: list, optional
    :param reform_cols: Columns in reform to keep.
    :type reform_cols: list, optional
    :returns: DataFrame with columns for base ("_base") and reform ("_reform").
    :rtype: pd.DataFrame

    """
    all_base_cols = mdf.listify([base_cols] + [cols])
    all_reform_cols = mdf.listify([reform_cols] + [cols])
    return base[all_base_cols].join(reform[all_reform_cols],
                                    lsuffix="_base",
                                    rsuffix="_reform")

コード例 #2

0

ファイルを表示

ファイル: agg.py プロジェクト: mgilbert1/microdf

def agg(base,
        reform,
        groupby,
        metrics,
        base_metrics=None,
        reform_metrics=None):
    """ Aggregates differences between base and reform.

    Args:
        base: Base DataFrame. Index must match reform.
        reform: Reform DataFrame. Index must match base.
        groupby: Variable in base to group on.
        metrics: List of variables to agg and calculate the % change of.
                 These should have associated weighted columns ending in _m
                 in base and reform.
        base_metrics: List of variables from base to sum. Defaults to None.
        reform_metrics: List of variables from reform to sum. Defaults to None.

    Returns:
        DataFrame with groupby and metrics, and _pctchg metrics.
    """
    metrics = mdf.listify(metrics)
    metrics_m = [i + '_m' for i in metrics]
    combined = combine_base_reform(base,
                                   reform,
                                   base_cols=mdf.listify(
                                       [groupby, base_metrics]),
                                   cols=mdf.listify(metrics_m),
                                   reform_cols=mdf.listify(reform_metrics))
    grouped = combined.groupby(groupby).sum()
    for metric in metrics:
        grouped[metric + '_pctchg'] = pctchg_base_reform(grouped, metric)
    return grouped

コード例 #3

0

ファイルを表示

def agg(
    base: pd.DataFrame,
    reform: pd.DataFrame,
    groupby: str,
    metrics: list,
    base_metrics: Optional[list],
    reform_metrics: Optional[list],
) -> pd.DataFrame:
    """Aggregates differences between base and reform.

    :param base: Base DataFrame. Index must match reform.
    :type base: pd.DataFrame
    :param reform: Reform DataFrame. Index must match base.
    :type reform: pd.DataFrame
    :param groupby: Variable in base to group on.
    :type groupby: str
    :param metrics: List of variables to agg and calculate the % change of.
        These should have associated weighted columns ending in _m in base
        and reform.
    :type metrics: list
    :param base_metrics: List of variables from base to sum.
    :type base_metrics: Optional[list]
    :param reform_metrics: List of variables from reform to sum.
    :type reform_metrics: Optional[list]
    :returns: DataFrame with groupby and metrics, and _pctchg metrics.
    :rtype: pd.DataFrame

    """
    metrics = mdf.listify(metrics)
    metrics_m = [i + "_m" for i in metrics]
    combined = combine_base_reform(
        base,
        reform,
        base_cols=mdf.listify([groupby, base_metrics]),
        cols=mdf.listify(metrics_m),
        reform_cols=mdf.listify(reform_metrics),
    )
    grouped = combined.groupby(groupby).sum()
    for metric in metrics:
        grouped[metric + "_pctchg"] = pctchg_base_reform(grouped, metric)
    return grouped

コード例 #4

0

ファイルを表示

ファイル: agg.py プロジェクト: mgilbert1/microdf

def combine_base_reform(base,
                        reform,
                        base_cols=None,
                        cols=None,
                        reform_cols=None):
    """ Combine base and reform with certain columns.
    
    Args:
        base: Base DataFrame. Index must match reform.
        reform: Reform DataFrame. Index must match base.
        base_cols: Columns in base to keep.
        cols: Columns to keep from both base and reform.
        reform_cols: Columns in reform to keep.
    
    Returns:
        DataFrame with columns for base ("_base") and 
            reform ("_reform").
    """
    all_base_cols = mdf.listify([base_cols] + [cols])
    all_reform_cols = mdf.listify([reform_cols] + [cols])
    return base[all_base_cols].join(reform[all_reform_cols],
                                    lsuffix='_base',
                                    rsuffix='_reform')

コード例 #5

0

ファイルを表示

def add_weighted_metrics(df, metric_vars, w="s006", divisor=1e6, suffix="_m"):
    """Adds weighted metrics in millions to a Tax-Calculator pandas DataFrame.

    Columns are renamed to *_m.

    :param df: A pandas DataFrame containing Tax-Calculator data.
    :param metric_vars: A list of column names to weight, or a single column
        name.
    :param w: Weight column. Defaults to s006.
    :param divisor: Number by which the product is divided. Defaults to 1e6.
    :param suffix: Suffix to add to each weighted total. Defaults to '_m'
            to match divisor default of 1e6.
    :returns: Nothing. Weighted columns are added in place.

    """
    df[w + suffix] = df[w] / divisor
    metric_vars = mdf.listify(metric_vars)
    for metric_var in metric_vars:
        df[metric_var + suffix] = df[metric_var] * df[w + suffix]

コード例 #6

0

ファイルを表示

def ubi_or_bens(
    df,
    ben_cols,
    max_ubi="max_ubi",
    ubi="ubi",
    bens="bens",
    update_income_measures=None,
):
    """Calculates whether a tax unit will take UBI or benefits,
       and adjusts values accordingly.

    :param df: DataFrame.
    :param ben_cols: List of columns for benefits.
    :param max_ubi: Column name of the maximum UBI, before accounting
            for benefits. Defaults to 'max_ubi'.
    :param ubi: Column name to add representing the UBI. Defaults to 'ubi'.
    :param bens: Column name to add representing total benefits (after
            adjustment). Defaults to 'bens'.
    :param update_income_measures: List of income measures to update.
            Defaults to ['expanded_income', 'aftertax_income'].
    :returns: Nothing. Benefits in ben_cols are adjusted, ubi and bens columns
        are added, and expanded_income and aftertax_income are updated
        according to the net difference.

    """
    if update_income_measures is None:
        update_income_measures = ["expanded_income", "aftertax_income"]
    # Prep list args.
    update_income_measures = mdf.listify(update_income_measures)
    total_bens = df[ben_cols].sum(axis=1)
    take_ubi = df[max_ubi] > total_bens
    df[ubi] = np.where(take_ubi, df[max_ubi], 0)
    for ben in ben_cols:
        df[ben] *= np.where(take_ubi, 0, 1)
    df[bens] = df[ben_cols].sum(axis=1)
    # Update expanded and aftertax income.
    diff = df.ubi + df.bens - total_bens
    for i in update_income_measures:
        df[i] += diff

コード例 #7

0

ファイルを表示

def calc_df(
    records=None,
    policy=None,
    year=2020,
    reform=None,
    group_vars=None,
    metric_vars=None,
    group_n65=False,
):
    """Creates a pandas DataFrame for given Tax-Calculator data.

    s006 is always included, and RECID is used as an index.

    :param records: An optional Records object. If not provided, uses CPS
        records. (Default value = None)
    :param policy: An optional Policy object. If not provided, uses default
            Policy.
    :param year: An optional year to advance to. If not provided, defaults to
            2020.
    :param reform: An optional reform to implement for the Policy object.
        (Default value = None)
    :param group_vars: An optional list of column names to include in the
            DataFrame. (Default value = None)
    :param metric_vars: An optional list of column names to include and
        calculate weighted sums of (in millions named as *_m) in the DataFrame.
        (Default value = None)
    :param group_n65: Whether to calculate and group by n65. Defaults to False.
    :returns: A pandas DataFrame. market_income is also always calculated.

    """
    tc = import_optional_dependency("taxcalc")
    # Assign defaults.
    if records is None:
        records = tc.Records.cps_constructor()
    if policy is None:
        policy = tc.Policy()
    if reform is not None:
        policy.implement_reform(reform)
    # Calculate.
    calc = tc.Calculator(records=records, policy=policy, verbose=False)
    calc.advance_to_year(year)
    calc.calc_all()
    # Get a deduplicated list of all columns.
    if group_n65:
        group_vars = group_vars + [
            "age_head",
            "age_spouse",
            "elderly_dependents",
        ]
    # Include expanded_income and benefits to produce market_income.
    all_cols = mdf.listify(
        [
            "RECID",
            "s006",
            "expanded_income",
            "aftertax_income",
            mdf.BENS,
            group_vars,
            metric_vars,
        ]
    )
    df = calc.dataframe(all_cols)
    # Create core elements.
    df["market_income"] = mdf.market_income(df)
    df["bens"] = df[mdf.BENS].sum(axis=1)
    df["tax"] = df.expanded_income - df.aftertax_income
    if group_n65:
        df["n65"] = n65(df.age_head, df.age_spouse, df.elderly_dependents)
        df.drop(
            ["age_head", "age_spouse", "elderly_dependents"],
            axis=1,
            inplace=True,
        )
    # Add calculated columns for metrics.
    mdf.add_weighted_metrics(df, metric_vars)
    # Set RECID to int and set it as index before returning.
    df["RECID"] = df.RECID.map(int)
    return df.set_index("RECID")

コード例 #8

0

ファイルを表示

def calc_df(records=None,
            policy=None,
            year=2019,
            reform=None,
            group_vars=None,
            metric_vars=None,
            group_n65=False):
    """Creates a pandas DataFrame for given Tax-Calculator data.

    s006 is always included, and RECID is used as an index.

    Args:
        records: An optional Records object. If not provided, uses CPS records.
        policy: An optional Policy object. If not provided, uses default
            Policy.
        year: An optional year to advance to. If not provided, defaults to
            2019.
        reform: An optional reform to implement for the Policy object.
        group_vars: An optional list of column names to include in the
            DataFrame.
        metric_vars: An optional list of column names to include and calculate
             weighted sums of (in millions named as *_m) in the DataFrame.
        group_n65: Whether to calculate and group by n65. Defaults to False.

    Returns:
        A pandas DataFrame. market_income is also always calculated.
    """
    tc = import_optional_dependency("taxcalc")
    # Assign defaults.
    if records is None:
        records = tc.Records.cps_constructor()
    if policy is None:
        policy = tc.Policy()
    if reform is not None:
        policy.implement_reform(reform)
    # Calculate.
    calc = tc.Calculator(records=records, policy=policy, verbose=False)
    calc.advance_to_year(year)
    calc.calc_all()
    # TODO: Make n65, ECI, etc. part of the list of columns you can request.
    # Get a deduplicated list of all columns.
    if group_n65:
        group_vars = group_vars + ['age_head', 'age_spouse',
                                   'elderly_dependents']
    # Include expanded_income and benefits to produce market_income.
    all_cols = mdf.listify(
        ['RECID', 's006', 'expanded_income', 'aftertax_income',
         mdf.BENS, group_vars, metric_vars])
    df = calc.dataframe(all_cols)
    # Create core elements.
    df['market_income'] = mdf.market_income(df)
    df['bens'] = df[mdf.BENS].sum(axis=1)
    df['tax'] = df.expanded_income - df.aftertax_income
    if group_n65:
        df['n65'] = n65(df.age_head, df.age_spouse, df.elderly_dependents)
        df.drop(['age_head', 'age_spouse', 'elderly_dependents'], axis=1,
                inplace=True)
    # Add calculated columns for metrics.
    mdf.add_weighted_metrics(df, metric_vars)
    # Set RECID to int and set it as index before returning.
    df['RECID'] = df.RECID.map(int)
    return df.set_index('RECID')