def top_n(__data, n, wt=None): """Filter to keep the top or bottom entries in each group. Args: ___data: a DataFrame n: the number of rows to keep in each group wt: a column or expression that determines ordering (defaults to the last column in data) Examples: >>> from siuba import _, top_n >>> df = pd.DataFrame({'x': [3, 1, 2, 4], 'y': [1, 1, 0, 0]}) >>> top_n(df, 2, _.x) x y 0 3 1 3 4 0 >>> top_n(df, -2, _.x) x y 1 1 1 2 2 0 >>> top_n(df, 2, _.x*_.y) x y 0 3 1 1 1 1 """ # NOTE: using min_rank, since it can return a lazy expr for min_rank(ing) # but I would rather not have it imported in verbs. will be more # reasonable if each verb were its own file? need abstract verb / vector module. # vector imports experimental right now, so we need to invert deps # TODO: # * what if wt is a string? should remove all str -> expr in verbs like group_by etc.. # * verbs like filter allow lambdas, but this func breaks with that from .vector import min_rank if wt is None: sym_wt = getattr(Symbolic(MetaArg("_")), __data.columns[-1]) elif isinstance(wt, Call): sym_wt = Symbolic(wt) else: raise TypeError("wt must be a symbolic expression, eg. _.some_col") if n > 0: return filter(__data, min_rank(-sym_wt) <= n) else: return filter(__data, min_rank(sym_wt) <= abs(n))
def big_print(d): from siuba.siu import Symbolic print() for k, v in d.items(): print(k, "--") print(v) print() print("symbol --") print(Symbolic(d["siuba"]))
def test_sym_slice(): from siuba.siu import _SliceOpIndex _ = Symbolic() sym = _[_ < 1] meta, slice_ops = strip_symbolic(sym).args assert isinstance(meta, MetaArg) assert isinstance(slice_ops, Call) assert isinstance(slice_ops, SliceOp) # abc metaclass assert slice_ops.__class__ is _SliceOpIndex indexer = slice_ops(1) assert indexer is False
def test_sym_slice_multiple(): from siuba.siu import _SliceOpExt _ = Symbolic() sym = _[_ < 1, :, :] meta, slice_ops = strip_symbolic(sym).args assert isinstance(meta, MetaArg) assert len(slice_ops.args) == 3 assert isinstance(slice_ops.args[0], Call) assert isinstance(slice_ops, SliceOp) # abc metaclass assert slice_ops.__class__ is _SliceOpExt indexer = slice_ops(1) assert indexer[0] is False assert indexer[1] == slice(None)
from siuba.siu import Symbolic, strip_symbolic # TODO: dot, corr, cov # ordered set aggregate. e.g. mode() # hypothetical-set aggregate (e.g. rank(a) as if it were in partition(order_by b)) # kinds of windows: # * result len n_elements: rank() # * result len 1: is_monotonic (lag, diff, and any). ordered set aggregate. # * result len input len: percentile_cont([.1, .2]). hypo set aggregate. _ = Symbolic() class Result: def __init__(self, **kwargs): self.options = kwargs def to_dict(self): return {'type': self.__class__.__name__, **self.options} class Elwise(Result): pass class Agg(Result): pass class Window(Result): pass
def _(): return Symbolic()
return action.get('kind', action.get('status')).title() def filter_on_result(spec, types): return [ k for k, v in spec.items() if v['action'].get('kind', v['action'].get('status')).title() in types ] SPEC_IMPLEMENTED = filter_on_result(spec, {"Agg", "Elwise", "Window"}) SPEC_NOTIMPLEMENTED = filter_on_result( spec, {"Singleton", "Wontdo", "Todo", "Maydo"}) SPEC_AGG = filter_on_result(spec, {"Agg"}) _ = Symbolic() @pytest.fixture(params=tuple(SPEC_IMPLEMENTED)) def entry(request): # NOTE: the sole purpose of putting in a fixture is so pytest line output # is very easy to read. (e.g. pytest -v --tb=line) key = request.param yield spec[key] @pytest.fixture(params=tuple(SPEC_AGG)) def agg_entry(request): key = request.param yield spec[key]