def test_rank(): s = mdf.MicroSeries([1, 2, 3], weights=[4, 5, 6]) assert np.array_equal(s.rank().values, [4, 9, 15]) s = mdf.MicroSeries([3, 1, 2], weights=[6, 4, 5]) assert np.array_equal(s.rank().values, [15, 4, 9]) s = mdf.MicroSeries([2, 1, 3], weights=[5, 4, 6]) assert np.array_equal(s.rank().values, [9, 4, 15])
def test_cumsum(): s = mdf.MicroSeries([1, 2, 3], weights=[4, 5, 6]) assert np.array_equal(s.cumsum().values, [4, 14, 32]) s = mdf.MicroSeries([2, 1, 3], weights=[5, 4, 6]) assert np.array_equal(s.cumsum().values, [10, 14, 32]) s = mdf.MicroSeries([3, 1, 2], weights=[6, 4, 5]) assert np.array_equal(s.cumsum().values, [18, 22, 32])
def test_top_pct(): x = list(range(1, 11)) # 1 to 10. Sum = 10 * 11 / 2 = 55. df = pd.DataFrame({"x": x}) ms = mdf.MicroSeries(x) RES = 10 / 55 assert mdf.top_10_pct_share(df, "x") == RES assert ms.top_10_pct_share() == RES x = list(range(1, 4)) df = pd.DataFrame({"x": x, "w": x}) ms = mdf.MicroSeries(x, weights=x) # This is equivalent to [1, 2, 2, 3, 3, 3] # Sum = 14, top half is 9. RES = 9 / 14 assert mdf.top_50_pct_share(df, "x", "w") == RES assert ms.top_50_pct_share() == RES
def test_series_getitem(): arr = np.array([0, 1, 1]) w = np.array([3, 0, 9]) s = mdf.MicroSeries(arr, weights=w) assert s[[1, 2]].sum() == np.sum(arr[[1, 2]] * w[[1, 2]]) assert s[1:3].sum() == np.sum(arr[1:3] * w[1:3])
def test_mean(): arr = np.array([3, 0, 2]) w = np.array([4, 1, 1]) series = mdf.MicroSeries(arr, weights=w) assert series.mean() == np.average(arr, weights=w) arr = np.linspace(-20, 100, 100) w = np.linspace(1, 3, 100) series = mdf.MicroSeries(arr) series.set_weights(w) assert series.mean() == np.average(arr, weights=w) w = np.linspace(1, 3, 101) series = mdf.MicroSeries(arr) try: series.set_weights(w) assert False except Exception: pass
def test_sum(): arr = np.array([0, 1, 1]) w = np.array([3, 0, 9]) series = mdf.MicroSeries(arr, weights=w) assert series.sum() == (arr * w).sum() arr = np.linspace(-20, 100, 100) w = np.linspace(1, 3, 100) series = mdf.MicroSeries(arr) series.set_weights(w) assert series.sum() == (arr * w).sum() # Verify that an error is thrown when passing weights of different size # from the values. w = np.linspace(1, 3, 101) series = mdf.MicroSeries(arr) try: series.set_weights(w) assert False except Exception: pass
def calc(self, var, map_to=None, period="2020", verbose=False): try: result = self.model.calculate(var, period) except: if verbose: print( f"Initial period calculation failed for {var}; attempting to gross up periods" ) try: result = self.model.calculate_add(var, period) except: if verbose: print( f"Grossing up period calculation failed for {var}; attempting to divide period" ) result = self.model.calculate_divide(var, period) entity = self.model.tax_benefit_system.variables[var].entity.key result = self.map_to(result, entity=entity, target_entity=map_to) if not self.use_microdf or self.weight_vars is None: return result return mdf.MicroSeries( result, weights=self.weight_vars[map_to or entity] )
def calc( self, var: str, period: Union[str, int] = 2020, weighted: bool = True, map_to: str = None, how: str = None, dp: int = 2, ) -> MicroSeries: if period is None: period = self.year try: var_metadata = self.simulation.tax_benefit_system.variables[var] arr = self.simulation.calculate(var, period) except Exception as e: try: arr = self.simulation.calculate_add(var, period) if var_metadata.value_type == bool: arr = arr >= 52 except: try: arr = self.simulation.calculate_divide(var, period) except: raise e if var_metadata.value_type == float: arr = arr.round(dp) if var_metadata.value_type == Enum: arr = arr.decode_to_str() if not weighted: return arr else: entity = var_metadata.entity.key if map_to: arr = self.map_to(arr, entity, map_to, how=how) entity = map_to return mdf.MicroSeries(arr, weights=self.entity_weights[entity])
import pandas as pd import pytest import microdf as mdf X = [1, 5, 2] Y = [0, -6, 3] W = [4, 1, 1] df = pd.DataFrame({"x": X, "y": Y, "w": W}) ms = mdf.MicroSeries(X, weights=W) md = mdf.MicroDataFrame(df[["x", "y"]], weights=W) # Also make a version with groups. df2 = df.copy(deep=True) df2.x *= 2 df2.y *= 1.5 dfg = pd.concat([df, df2]) dfg["g"] = ["a"] * 3 + ["b"] * 3 mdg = mdf.MicroDataFrame(dfg[["x", "y", "g"]], weights=W) def test_weighted_quantile(): Q = [0, 0.5, 1] mdf.weighted_quantile(df, "x", "w", Q).tolist() def test_weighted_median(): assert mdf.weighted_median(df, "x") == 2 mdf.weighted_median(df, "x", "w") # Test with groups. mdf.weighted_median(dfg, "x", "w", "g")
def test_median(): # 1, 2, 3, 4, *4*, 4, 5, 5, 5 arr = np.array([1, 2, 3, 4, 5]) w = np.array([1, 1, 1, 3, 3]) series = mdf.MicroSeries(arr, weights=w) assert series.median() == 4
def test_decile_rank_rank(): s = mdf.MicroSeries( [5, 4, 3, 2, 1, 6, 7, 8, 9], weights=[10, 20, 10, 10, 10, 10, 10, 10, 10, 10], ) assert np.array_equal(s.decile_rank().values, [6, 5, 3, 2, 1, 7, 8, 9, 10])
def test_quintile_rank(): s = mdf.MicroSeries([4, 2, 3], weights=[20, 60, 20]) assert np.array_equal(s.quintile_rank().values, [5, 3, 4])
def test_quartile_rank(): s = mdf.MicroSeries([4, 2, 3], weights=[25, 50, 25]) assert np.array_equal(s.quartile_rank().values, [4, 2, 3])
def test_percentile_rank(): s = mdf.MicroSeries([4, 2, 3, 1], weights=[20, 40, 20, 20]) assert np.array_equal(s.percentile_rank().values, [100, 60, 80, 20])