Esempio n. 1
0
def test_subset():
    df = mdf.MicroDataFrame(
        {"x": [1, 2], "y": [3, 4], "z": [5, 6]}, weights=[7, 8]
    )
    df_no_z = mdf.MicroDataFrame({"x": [1, 2], "y": [3, 4]}, weights=[7, 8])
    assert df[["x", "y"]].equals(df_no_z)
    df_no_z_diff_weights = df_no_z.copy()
    df_no_z_diff_weights.weights += 1
    assert not df[["x", "y"]].equals(df_no_z_diff_weights)
Esempio n. 2
0
def test_df_init():
    arr = np.array([0, 1, 1])
    w = np.array([3, 0, 9])
    df = mdf.MicroDataFrame({"a": arr}, weights=w)
    assert df.a.mean() == np.average(arr, weights=w)

    df = mdf.MicroDataFrame()
    df["a"] = arr
    df.set_weights(w)
    assert df.a.mean() == np.average(arr, weights=w)

    df = mdf.MicroDataFrame()
    df["a"] = arr
    df["w"] = w
    df.set_weight_col("w")
    assert df.a.mean() == np.average(arr, weights=w)
Esempio n. 3
0
def test_concat():
    df1 = mdf.MicroDataFrame({"x": [1, 2]}, weights=[3, 4])
    df2 = mdf.MicroDataFrame({"y": [5, 6]}, weights=[7, 8])
    # Verify that pd.concat returns DataFrame (probably no way to fix this).
    pd_long = pd.concat([df1, df2])
    assert isinstance(pd_long, pd.DataFrame)
    assert not isinstance(pd_long, mdf.MicroDataFrame)
    # Verify that mdf.concat works.
    mdf_long = mdf.concat([df1, df2])
    assert isinstance(mdf_long, mdf.MicroDataFrame)
    # Weights should be preserved.
    assert mdf_long.weights.equals(pd.concat([df1.weights, df2.weights]))
    # Verify it works horizontally too (take the first set of weights).
    mdf_wide = mdf.concat([df1, df2], axis=1)
    assert isinstance(mdf_wide, mdf.MicroDataFrame)
    assert mdf_wide.weights.equals(df1.weights)
Esempio n. 4
0
def test_copy_equals():
    d = mdf.MicroDataFrame(
        {"x": [1, 2], "y": [3, 4], "z": [5, 6]}, weights=[7, 8]
    )
    d_copy = d.copy()
    d_copy_diff_weights = d_copy.copy()
    d_copy_diff_weights.weights *= 2
    assert d.equals(d_copy)
    assert not d.equals(d_copy_diff_weights)
    # Same for a MicroSeries.
    assert d.x.equals(d_copy.x)
    assert not d.x.equals(d_copy_diff_weights.x)
Esempio n. 5
0
def concat(*args, **kwargs):
    """Concatenates MicroDataFrame objects, preserving weights.
    If concatenating horizontally, the first set of weights are used.
    All args and kwargs are passed to pd.concat.

    :return: MicroDataFrame with concatenated weights.
    :rtype: mdf.MicroDataFrame
    """
    # Extract args with respect to pd.concat.
    pd_args = inspect.getcallargs(pd.concat, *args, **kwargs)
    objs = pd_args["objs"]
    axis = pd_args["axis"]
    # Create result, starting with pd.concat.
    res = mdf.MicroDataFrame(pd.concat(*args, **kwargs))
    # Assign weights depending on axis.
    if axis == 0:
        res.weights = pd.concat([obj.weights for obj in objs])
    else:
        # If concatenating horizontally, use the first set of weights.
        res.weights = objs[0].weights
    return res
Esempio n. 6
0
import pandas as pd
import pytest

import microdf as mdf

X = [1, 5, 2]
Y = [0, -6, 3]
W = [4, 1, 1]
df = pd.DataFrame({"x": X, "y": Y, "w": W})
ms = mdf.MicroSeries(X, weights=W)
md = mdf.MicroDataFrame(df[["x", "y"]], weights=W)
# Also make a version with groups.
df2 = df.copy(deep=True)
df2.x *= 2
df2.y *= 1.5
dfg = pd.concat([df, df2])
dfg["g"] = ["a"] * 3 + ["b"] * 3
mdg = mdf.MicroDataFrame(dfg[["x", "y", "g"]], weights=W)


def test_weighted_quantile():
    Q = [0, 0.5, 1]
    mdf.weighted_quantile(df, "x", "w", Q).tolist()


def test_weighted_median():
    assert mdf.weighted_median(df, "x") == 2
    mdf.weighted_median(df, "x", "w")
    # Test with groups.
    mdf.weighted_median(dfg, "x", "w", "g")
Esempio n. 7
0
 def df(self, cols, map_to="person"):
     df = {}
     for var in cols:
         df[var] = self.calc(var, map_to=map_to)
     return mdf.MicroDataFrame(df, weights=self.weight_vars[map_to])
Esempio n. 8
0
import microdf as mdf

import numpy as np
import pandas as pd

df = pd.DataFrame({
    "income": [-10, 0, 10, 20],
    "threshold": [15, 10, 15, 10],
    "weight": [1, 2, 3, 4],
})
md = mdf.MicroDataFrame(df[["income", "threshold"]], weights=df.weight)


def test_poverty_rate():
    # Unweighted
    assert np.allclose(mdf.poverty_rate(df, "income", "threshold"), 3 / 4)
    # Weighted
    assert np.allclose(mdf.poverty_rate(df, "income", "threshold", "weight"),
                       6 / 10)
    assert np.allclose(md.poverty_rate("income", "threshold"), 6 / 10)


def test_deep_poverty_rate():
    # Unweighted
    assert np.allclose(mdf.deep_poverty_rate(df, "income", "threshold"), 2 / 4)
    # Weighted
    assert np.allclose(
        mdf.deep_poverty_rate(df, "income", "threshold", "weight"), 3 / 10)
    assert np.allclose(md.deep_poverty_rate("income", "threshold"), 3 / 10)

Esempio n. 9
0
def test_multiple_groupby():
    df = mdf.MicroDataFrame({"x": [1, 2], "y": [3, 4], "z": [5, 6]})
    assert (df.groupby(["x", "y"]).z.sum() == np.array([5, 6])).all()
Esempio n. 10
0
def test_unweighted_groupby():
    df = mdf.MicroDataFrame({"x": [1, 2], "y": [3, 4], "z": [5, 6]})
    assert (df.groupby("x").z.sum().values == np.array([5.0, 6.0])).all()
Esempio n. 11
0
def test_value_subset():
    d = mdf.MicroDataFrame({"x": [1, 2, 3], "y": [1, 2, 2]}, weights=[4, 5, 6])
    d2 = d[d.y > 1]
    assert d2.y.shape == d2.weights.shape
Esempio n. 12
0
def test_reset_index():
    d = mdf.MicroDataFrame(dict(x=[1, 2, 3]), weights=[4, 5, 6])
    assert d.reset_index().__class__ == MicroDataFrame
Esempio n. 13
0
def test_set_index():
    d = mdf.MicroDataFrame(dict(x=[1, 2, 3]), weights=[4, 5, 6])
    assert d.x.__class__ == MicroSeries
    d.index = [1, 2, 3]
    assert d.x.__class__ == MicroSeries
Esempio n. 14
0
    "in_deep_poverty_bhc",
]

BASELINE_HH_COLS = ["household_weight", "people", "region"]

# Extract these for baseline too.
REFORM_HH_COLS = [
    "household_net_income",
    "equiv_household_net_income",
    "poverty_gap_bhc",
    "poverty_gap_ahc",
]

p_base = mdf.MicroDataFrame(
    baseline_sim.df(BASELINE_PERSON_COLS + REFORM_PERSON_COLS,
                    map_to="person"),
    weights="household_weight",
)
p_base.rename(
    dict(zip(REFORM_PERSON_COLS, [i + "_base" for i in REFORM_PERSON_COLS])),
    axis=1,
    inplace=True,
)

hh_base = mdf.MicroDataFrame(
    baseline_sim.df(BASELINE_HH_COLS + REFORM_HH_COLS, map_to="household"),
    weights="household_weight",
)
hh_base.rename(
    dict(zip(REFORM_HH_COLS, [i + "_base" for i in REFORM_HH_COLS])),
    axis=1,