Beispiel #1
0
def test_weighted_quantile():
    Q = [0, 0.5, 1]
    EXPECTED_UNWEIGHTED = [1, 2, 5]
    res_unweighted = mdf.weighted_quantile(X, Q).tolist()
    assert EXPECTED_UNWEIGHTED == res_unweighted
    res_weighted = mdf.weighted_quantile(X, Q, W).tolist()
    # Waiting for a better test, given the result isn't exactly the same as
    # stacking values.
    # See stackoverflow.com/q/21844024#comment102342137_29677616
    # EXPECTED_WEIGHTED = [1, 1, 5]
    # For now, check that median is less than the unweighted median.
    assert res_weighted[1] < res_unweighted[1]
Beispiel #2
0
def top_x_pct_share(df, col, top_x_pct, w=None):
    """Calculates top x% share.

    :param df: DataFrame.
    :param col: Name of column in df representing value.
    :param top_x_pct: Decimal between 0 and 1 of the top %, e.g. 0.1, 0.001.
    :param w: Column representing weight in df.
    :returns: The share of w-weighted val held by the top x%.

    """
    threshold = mdf.weighted_quantile(df, col, w, 1 - top_x_pct)
    top_x_pct_sum = mdf.weighted_sum(df[df[col] >= threshold], col, w)
    total_sum = mdf.weighted_sum(df, col, w)
    return top_x_pct_sum / total_sum
Beispiel #3
0
def top_x_pct_share(val, top_x_pct, w=None):
    """Calculates top x% share.

    Args:
        val: Value (list-like).
        top_x_pct: Decimal between 0 and 1 of the top %, e.g. 0.1, 0.001.
        w: Weight (list-like, same length as val).

    Returns:
        The share of w-weighted val held by the top x%.
    """
    val = pd.Series(val)
    if w is None:
        w = np.ones(val.size)
    w = pd.Series(w)
    threshold = mdf.weighted_quantile(val, 1 - top_x_pct, w)
    filt = val >= threshold
    top_x_pct_sum = (val[filt] * w[filt]).sum()
    total_sum = (val * w).sum()
    return top_x_pct_sum / total_sum
Beispiel #4
0
def total_wealth_by_decile(data, measure):
    quant_df = pd.DataFrame()
    for race2 in data.race2.unique():
        race_df = data[data.race2 == race2].copy(deep=True)
        decile_bounds = np.arange(0, 1.1, 0.1)
        deciles = mdf.weighted_quantile(race_df, measure, "wgt", decile_bounds)

        race_total_nw = mdf.weighted_sum(race_df, measure, "wgt")
        quantile_nws = []
        for index, value in enumerate(deciles):
            if index + 1 < len(deciles):
                quantile_subset = race_df[race_df.networth.between(
                    value, deciles[index + 1])]
                quantile_nws.append(
                    mdf.weighted_sum(quantile_subset, measure, "wgt"))
        quantile_nw_pct = (quantile_nws / race_total_nw) * 100
        race_quant_df = pd.DataFrame({race2: quantile_nw_pct},
                                     index=np.arange(1, 11, 1))
        quant_df = pd.concat([quant_df, race_quant_df], axis=1)
    return quant_df
Beispiel #5
0
def test_weighted_quantile():
    Q = [0, 0.5, 1]
    mdf.weighted_quantile(df, "x", "w", Q).tolist()
Beispiel #6
0
 def _top_x_pct_share(df, col, top_x_pct, w=None):
     threshold = mdf.weighted_quantile(df, col, w, 1 - top_x_pct)
     top_x_pct_sum = mdf.weighted_sum(df[df[col] >= threshold], col, w)
     total_sum = mdf.weighted_sum(df, col, w)
     return top_x_pct_sum / total_sum