예제 #1
0
def top_n(__data, n, wt=None):
    """Filter to keep the top or bottom entries in each group.

    Args:
        ___data: a DataFrame
        n: the number of rows to keep in each group
        wt: a column or expression that determines ordering (defaults to the last column in data)

    Examples:
        >>> from siuba import _, top_n
        >>> df = pd.DataFrame({'x': [3, 1, 2, 4], 'y': [1, 1, 0, 0]})
        >>> top_n(df, 2, _.x)
           x  y
        0  3  1
        3  4  0

        >>> top_n(df, -2, _.x)
           x  y
        1  1  1
        2  2  0

        >>> top_n(df, 2, _.x*_.y)
           x  y
        0  3  1
        1  1  1

    """
    # NOTE: using min_rank, since it can return a lazy expr for min_rank(ing)
    #       but I would rather not have it imported in verbs. will be more
    #       reasonable if each verb were its own file? need abstract verb / vector module.
    #       vector imports experimental right now, so we need to invert deps
    # TODO:
    #   * what if wt is a string? should remove all str -> expr in verbs like group_by etc..
    #   * verbs like filter allow lambdas, but this func breaks with that
    from .vector import min_rank
    if wt is None:
        sym_wt = getattr(Symbolic(MetaArg("_")), __data.columns[-1])
    elif isinstance(wt, Call):
        sym_wt = Symbolic(wt)
    else:
        raise TypeError("wt must be a symbolic expression, eg. _.some_col")

    if n > 0:
        return filter(__data, min_rank(-sym_wt) <= n)
    else:
        return filter(__data, min_rank(sym_wt) <= abs(n))
예제 #2
0
파일: __init__.py 프로젝트: pythseq/wrestlr
def big_print(d):
    from siuba.siu import Symbolic
    print()
    for k, v in d.items():
        print(k, "--")
        print(v)
        print()

    print("symbol --")
    print(Symbolic(d["siuba"]))
예제 #3
0
파일: test_siu.py 프로젝트: machow/siuba
def test_sym_slice():
    from siuba.siu import _SliceOpIndex

    _ = Symbolic()

    sym = _[_ < 1]
    meta, slice_ops = strip_symbolic(sym).args
    assert isinstance(meta, MetaArg)
    assert isinstance(slice_ops, Call)
    assert isinstance(slice_ops, SliceOp)  # abc metaclass
    assert slice_ops.__class__ is _SliceOpIndex

    indexer = slice_ops(1)
    assert indexer is False
예제 #4
0
파일: test_siu.py 프로젝트: machow/siuba
def test_sym_slice_multiple():
    from siuba.siu import _SliceOpExt

    _ = Symbolic()

    sym = _[_ < 1, :, :]

    meta, slice_ops = strip_symbolic(sym).args
    assert isinstance(meta, MetaArg)
    assert len(slice_ops.args) == 3
    assert isinstance(slice_ops.args[0], Call)
    assert isinstance(slice_ops, SliceOp)  # abc metaclass
    assert slice_ops.__class__ is _SliceOpExt

    indexer = slice_ops(1)
    assert indexer[0] is False
    assert indexer[1] == slice(None)
예제 #5
0
파일: series.py 프로젝트: mcbarlowe/siuba
from siuba.siu import Symbolic, strip_symbolic
# TODO: dot, corr, cov
# ordered set aggregate. e.g. mode()
# hypothetical-set aggregate (e.g. rank(a) as if it were in partition(order_by b))

# kinds of windows:
#   * result len n_elements: rank()
#   * result len 1: is_monotonic (lag, diff, and any). ordered set aggregate.
#   * result len input len: percentile_cont([.1, .2]). hypo set aggregate.

_ = Symbolic()


class Result:
    def __init__(self, **kwargs):
        self.options = kwargs

    def to_dict(self):
        return {'type': self.__class__.__name__, **self.options}


class Elwise(Result):
    pass


class Agg(Result):
    pass


class Window(Result):
    pass
예제 #6
0
파일: test_siu.py 프로젝트: machow/siuba
def _():
    return Symbolic()
    return action.get('kind', action.get('status')).title()


def filter_on_result(spec, types):
    return [
        k for k, v in spec.items()
        if v['action'].get('kind', v['action'].get('status')).title() in types
    ]


SPEC_IMPLEMENTED = filter_on_result(spec, {"Agg", "Elwise", "Window"})
SPEC_NOTIMPLEMENTED = filter_on_result(
    spec, {"Singleton", "Wontdo", "Todo", "Maydo"})
SPEC_AGG = filter_on_result(spec, {"Agg"})

_ = Symbolic()


@pytest.fixture(params=tuple(SPEC_IMPLEMENTED))
def entry(request):
    # NOTE: the sole purpose of putting in a fixture is so pytest line output
    #       is very easy to read. (e.g. pytest -v --tb=line)
    key = request.param
    yield spec[key]


@pytest.fixture(params=tuple(SPEC_AGG))
def agg_entry(request):
    key = request.param
    yield spec[key]