def _call_strip_ascending(f): if isinstance(f, Symbolic): f = strip_symbolic(f) if isinstance(f, Call) and f.func == "__neg__": return f.args[0], False return f, True
def test_slice_call_returns(_, expr, target): data = {'a': 1, 'b': 2} sym = eval(expr, {"_": _}) index_op, slice_call = strip_symbolic(sym).args res = slice_call(data) assert res == target
def wrapper(*args, **kwargs): strip_args = map(strip_symbolic, args) strip_kwargs = {k: strip_symbolic(v) for k, v in kwargs.items()} if not args: return dispatch_func(NoArgs(), **strip_kwargs) return dispatch_func(*strip_args, **strip_kwargs)
def __rrshift__(self, x): """Handle >> syntax when pipe is on the right (eager piping).""" if isinstance(x, (Symbolic, Call)): call = strip_symbolic(x) return Pipeable(calls = [call] + self.calls) elif callable(x): return Pipeable(calls = [x] + self.calls) return self(x)
def test_lazy_tbl_shape_call_error(db): tbl = LazyTbl(db, 'addresses') call = strip_symbolic(_.id.asdkfjsdf()) with pytest.raises(SqlFunctionLookupError) as err: tbl.shape_call(call) # suppresses context for shorter stack trace assert err.__suppress_context__ == True
def test_pandas_grouped_frame_fast_not_implemented(notimpl_entry): from siuba.experimental.pd_groups.dialect import fast_mutate gdf = data[notimpl_entry['accessor']].groupby('g') # TODO: once reading from yaml, no need to repr str_expr = str(notimpl_entry['expr_frame']) call_expr = strip_symbolic(eval(str_expr, {'_': _})) with pytest.raises(NotImplementedError): res = fast_mutate(gdf, result=call_expr)
def __init__(self, f = None, calls = None): # symbolics like _.some_attr need to be stripped down to a call, because # calling _.some_attr() returns another symbolic. f = strip_symbolic(f) if f is not None: if calls is not None: raise Exception() self.calls = [f] else: self.calls = calls
def __rshift__(self, x): """Handle >> syntax when pipe is on the left (lazy piping).""" if isinstance(x, Pipeable): return Pipeable(calls = self.calls + x.calls) elif isinstance(x, (Symbolic, Call)): call = strip_symbolic(x) return Pipeable(calls = self.calls + [call]) elif callable(x): return Pipeable(calls = self.calls + [x]) raise Exception()
def test_frame_expr(entry): # TODO: remove this test, not checking anything new df = data[entry['accessor']] # TODO: once reading from yaml, no need to repr str_expr = str(entry['expr_frame']) call_expr = strip_symbolic(eval(str_expr, {'_': _})) res = call_expr(df) dst = eval(str_expr, {'_': df}) assert res.__class__ is dst.__class__ assert_src_array_equal(res, dst)
def case_when(__data, cases): if isinstance(cases, Call): cases = cases(__data) # TODO: handle when receive list of (k,v) pairs for py < 3.5 compat? stripped_cases = {strip_symbolic(k): strip_symbolic(v) for k,v in cases.items()} n = len(__data) out = np.repeat(None, n) for k, v in reversed(list(stripped_cases.items())): if callable(k): result = _val_call(k, __data, n) indx = np.where(result)[0] val_res = _val_call(v, __data, n, indx) out[indx] = val_res elif k: # e.g. k is just True, etc.. val_res = _val_call(v, __data, n) out[:] = val_res # by recreating an array, attempts to cast as best dtype return np.array(list(out))
def test_vector_unary_and_dispatch(x, v_func, res): target = Series(res) if not isinstance(res, Series) else res assert_series_equal(v_func(x), target) # symbolic call argument sym_call = v_func(_) assert isinstance(sym_call, Symbolic) assert_series_equal(sym_call(x), target) # call argument call = v_func(strip_symbolic(_)) assert isinstance(call, Call) assert_series_equal(call(x), target)
def test_sym_slice(): from siuba.siu import _SliceOpIndex _ = Symbolic() sym = _[_ < 1] meta, slice_ops = strip_symbolic(sym).args assert isinstance(meta, MetaArg) assert isinstance(slice_ops, Call) assert isinstance(slice_ops, SliceOp) # abc metaclass assert slice_ops.__class__ is _SliceOpIndex indexer = slice_ops(1) assert indexer is False
def test_series_against_call(entry): if entry['action']['kind'] == "window": pytest.skip() df = data[entry['accessor']] # TODO: once reading from yaml, no need to repr str_expr = str(entry['expr_series']) call_expr = strip_symbolic(eval(str_expr, {'_': _})) res = call_expr(df.x) dst = eval(str_expr, {'_': df.x}) assert res.__class__ is dst.__class__ assert_src_array_equal(res, dst)
def test_pandas_grouped_frame_fast_not_implemented(notimpl_entry): from siuba.experimental.pd_groups.dialect import fast_mutate gdf = data[notimpl_entry['accessor']].groupby('g') # TODO: once reading from yaml, no need to repr str_expr = str(notimpl_entry['expr_frame']) call_expr = strip_symbolic(eval(str_expr, {'_': _})) if notimpl_entry['action']['status'] in [ "todo", "maydo", "wontdo" ] and notimpl_entry["is_property"]: pytest.xfail() with pytest.raises(NotImplementedError): res = fast_mutate(gdf, result=call_expr)
def test_sym_slice_multiple(): from siuba.siu import _SliceOpExt _ = Symbolic() sym = _[_ < 1, :, :] meta, slice_ops = strip_symbolic(sym).args assert isinstance(meta, MetaArg) assert len(slice_ops.args) == 3 assert isinstance(slice_ops.args[0], Call) assert isinstance(slice_ops, SliceOp) # abc metaclass assert slice_ops.__class__ is _SliceOpExt indexer = slice_ops(1) assert indexer[0] is False assert indexer[1] == slice(None)
def test_symbolic_dispatch(): @symbolic_dispatch def f(x, y=2): return x + y # w/ simple Call call1 = f(strip_symbolic(_), 3) assert isinstance(call1, Call) assert call1(2) == 5 # w/ simple Symbol sym2 = f(_, 3) assert isinstance(sym2, Symbolic) assert sym2(2) == 5 # w/ complex Call sym3 = f(_['a'], 3) assert sym3({'a': 2}) == 5
def enrich_spec_entry(entry): accessors = ['str', 'dt', 'cat', 'sparse'] expr = strip_symbolic(eval(entry["example"], {"_": _})) accessor = [ameth for ameth in accessors if ameth in expr.op_vars()] + [None] tmp = { **entry, 'is_property': expr.func == "__getattr__", 'expr_frame': replace_meta_args(expr, _.x, _.y, _.z), 'expr_series': expr, 'accessor': accessor[0], } tmp['action'] = { **entry['action'], 'data_arity': count_call_type(expr, MetaArg), } return tmp
def test_pandas_grouped_frame_fast_not_implemented(notimpl_entry): from siuba.experimental.pd_groups.dialect import fast_mutate gdf = data[notimpl_entry['accessor']].groupby('g') # TODO: once reading from yaml, no need to repr str_expr = str(notimpl_entry['expr_frame']) call_expr = strip_symbolic(eval(str_expr, {'_': _})) if notimpl_entry['action']['status'] in [ "todo", "maydo", "wontdo" ] and notimpl_entry["is_property"]: pytest.xfail() with pytest.warns(UserWarning): try: # not implemented functions are punted to apply, and # not guaranteed to work (e.g. many lengthen arrays, etc..) res = fast_mutate(gdf, result=call_expr) except: pass
def get_type_info(call): call = strip_symbolic(call) func = call.func if isinstance(func, str) and func != "__rshift__": raise ValueError("Expected first expressions was >>") out = {} expr, result = call.args accessors = ['str', 'dt', 'cat', 'sparse'] accessor = [ameth for ameth in accessors if ameth in expr.op_vars()] + [None] return dict( expr_series = expr, expr_frame = replace_meta_args(expr, _.x, _.y, _.z), accessor = accessor[0], result = result.to_dict(), is_property = expr.func == "__getattr__", data_arity = count_call_type(expr, MetaArg) )
def test_call_tree_local_sub_attr_property_missing(ctl): # subattr raises lookup errors (property) with pytest.raises(FunctionLookupError): ctl.enter(strip_symbolic(_.str.f_b))
def test_call_tree_local_sub_attr_alone(ctl): # attr alone is treated like a normal getattr call = ctl.enter(strip_symbolic(_.str)) assert call.func == "__getattr__" assert call.args[1] == "str"
def test_call_tree_local_sub_attr_property(ctl): # sub attr gets stripped w/ property access call = ctl.enter(strip_symbolic(_.str.f_a)) assert call('x') == 'x'
def test_call_tree_local_sub_attr_method(ctl): # sub attr gets stripped w/ method call call = ctl.enter(strip_symbolic(_.str.f_a())) assert call('x') == 'x'
def test_op_vars_slice(): assert strip_symbolic(_.a[_.b:_.c]).op_vars() == {'a', 'b', 'c'}
def test_call_tree_local_sub_attr_method_missing(_, ctl): # subattr raises lookup errors (method) with pytest.raises(FunctionLookupError): ctl.enter(strip_symbolic(_.str.f_b()))
# to_xarray # to_hdf # to_sql # to_msgpack # to_json # to_dense # to_string # to_clipboard # to_latex } from siuba.spec.utils import get_type_info import itertools funcs_stripped = { section_name: {k: strip_symbolic(v) for k, v in section.items()} for section_name, section in funcs.items() } all_funcs = dict(itertools.chain(*[x.items() for x in funcs_stripped.values()])) # Get spec ===== nested_spec = {} for category, call_dict in funcs_stripped.items(): nested_spec[category] = d = {} for name, call in call_dict.items(): d[name] = get_type_info(call) d[name]['category'] = category
def _case_when(__data, cases): if not isinstance(cases, dict): raise Exception("Cases must be a dictionary") dict_entries = dict((strip_symbolic(k), strip_symbolic(v)) for k,v in cases.items()) cases_arg = Lazy(DictCall("__call__", dict, dict_entries)) return create_sym_call(case_when, __data, cases_arg)
def get_df_expr(entry): str_expr = str(entry['expr_frame']) call_expr = strip_symbolic(eval(str_expr, {'_': _})) return str_expr, call_expr