def default(self, obj): ordered_attrs = pipe( partial(map, lambda attr: (attr, getattr(obj, attr))), partial(remove_values, isnone), partial(remove_values, all_fn(isa(list, dict), isempty)), partial(walk_values, iffy(isa(dict), sort_dict)), OrderedDict) if isinstance(obj, Context): return ordered_attrs(['key', 'operator', 'operand', 'match_all']) elif isinstance(obj, Binding): return ordered_attrs(['keys', 'command', 'args', 'context']) else: return super().default(obj)
def as_json(self): is_array = compose(isa(int), attrgetter("name")) if all(is_array, self.children): return [x.as_json() for x in self.children] else: return {x.name:x.as_json() for x in self.children}
def is_named(self, name): if all(isa(basestring), [self.name, name]): return self.name.lower() == name.lower() else: # most likely one of the arguments is an int - we get str from # the parser but the names (if numerical) are stored as ints return str(self.name) == str(name)
def as_json(self): is_array = compose(isa(int), attrgetter("name")) if all(is_array, self.children): return [x.as_json() for x in self.children] else: return {x.name: x.as_json() for x in self.children}
def _preprocess(self, bindings): return pipe( deepcopy, partial(lflatten, follow=isa(list, tuple, Keymap)), self._apply_common_context, self._apply_default_match_all )(bindings)
def _has_correct_input_type(self, feature): '''Check that `input` is a string or iterable of string''' input = feature.input is_str = funcy.isa(str) is_nested_str = funcy.all_fn( funcy.iterable, lambda x: all(map(is_str, x))) assert is_str(input) or is_nested_str(input)
def test_df_colnames(input, transformer, output): feature = Feature(input, transformer, output=output) mapper = FeatureEngineeringPipeline(feature) entities_df = pd.util.testing.makeCustomDataframe(5, 2) entities_df.columns = ['foo', 'bar'] feature_matrix = mapper.fit_transform(entities_df) feature_frame = pd.DataFrame( feature_matrix, columns=mapper.transformed_names_, index=entities_df.index, ) assert fy.all(fy.isa(str), feature_frame.columns)
def values(self, *fields, **expressions): """ Extended version supporting renames: .values('id', 'name', author__name='author') """ renames = select_values(isa(six.string_types), expressions) if not renames: return base.values(self, *fields, **expressions) elif django.VERSION >= (1, 11): rename_expressions = walk_values(F, renames) expressions.update(rename_expressions) return base.values(self, *fields, **expressions) else: f_to_name = flip(renames) rename = lambda d: {f_to_name.get(k, k): v for k, v in d.items()} return base.values(self, *chain(fields, f_to_name)).map(rename)
def assertPandasObjectNotEqual(self, first, second, msg=None, **kwargs): is_pdobj = funcy.isa(pd.core.base.PandasObject) if is_pdobj(first) and is_pdobj(second): if isinstance(first, type(second)): if isinstance(first, pd.DataFrame): self.assertFrameNotEqual(first, second, msg=msg, **kwargs) elif isinstance(first, pd.Series): self.assertSeriesNotEqual(first, second, msg=msg, **kwargs) elif isinstance(first, pd.Index): self.assertIndexNotEqual(first, second, msg=msg, **kwargs) else: # unreachable? raise AssertionError('you found a bug: unreachable code') else: # it's great that they are uncomparable types :) pass
def assertPandasObjectEqual(self, first, second, msg=None, **kwargs): is_pdobj = funcy.isa(pd.core.base.PandasObject) if is_pdobj(first) and is_pdobj(second): if isinstance(first, type(second)): if isinstance(first, pd.DataFrame): self.assertFrameEqual(first, second, msg=msg, **kwargs) elif isinstance(first, pd.Series): self.assertSeriesEqual(first, second, msg=msg, **kwargs) elif isinstance(first, pd.Index): self.assertIndexEqual(first, second, msg=msg, **kwargs) else: # unreachable? raise AssertionError('you found a bug: unreachable code') else: standardMsg = '{} and {} are uncomparable types'.format( *_common_shorten_repr(first, second)) msg = self._formatMessage(msg, standardMsg) raise self.failureException(msg)
def get_data_frame(document_id: str, raw: bool = False, auth_args: Auth = Auth.shared()): auth = Auth(auth_args) document = Document.get(document_id, auth_args=auth_args) file_id = pipe( document.get("content", []), c.filter(lambda c: c.get("format", {}).get("code") == "ocr-text-file-id"), c.first, c.get("attachment", default={}), c.get("url"), iffy(isa(str), lambda url: url.split("/")[-1]), ) if file_id is None: raise ValueError( f"No block file found for document: '{document_id}'") files = Files(auth.session()) filename = files.download(file_id, "/tmp/") frame = pd.read_json(filename, lines=True) os.remove(filename) if raw or len(frame) == 0: return frame return Block.sort( frame.drop(["Geometry"], axis=1).join( pd.json_normalize(frame.Geometry)).pipe( partial( Frame.expand, custom_columns=[ Frame.codeable_like_column_expander("Polygon") ], )).set_index("Id"))
def __call__(self, func: Callable): mod = sys.modules[func.__module__] examples = fy.select( fy.isa(dt.Example), dt.DocTestParser().parse(str(func.__doc__), func.__name__)) def scenario(atest): def outcome(): runner, out = dt.DocTestRunner(), io.StringIO() atest.globs.update(self.ns) res = runner.run(atest, out=out.write, clear_globs=False) self.ns.update(atest.globs) if res.failed == 0: return '' else: # pragma: nocov return os.linesep.join(out.getvalue().split( os.linesep)[1:]) return outcome def maybe_lineof(obj): code = getattr(obj, '__code__', False) if code: return code.co_firstlineno else: return -1 def collect(): self.ns.update(mod.__dict__) for example in examples: atest = dt.DocTest([example], self.ns, mod.__name__, mod.__file__, maybe_lineof(func), str(func.__doc__)) yield scenario(atest) return pytest.mark.parametrize("doctest", collect())
def assert_series_not_equal(first, second, **kwargs): """Test that Series first and second are not equal""" _assert_pandas_not_equal(pdt.assert_series_equal, first, second, **kwargs) def assert_index_equal(first, second, **kwargs): """Test that Index first and second are equal""" _assert_pandas_equal(pdt.assert_index_equal, first, second, **kwargs) def assert_index_not_equal(first, second, **kwargs): """Test that Index first and second are not equal""" _assert_pandas_not_equal(pdt.assert_index_equal, first, second, **kwargs) _is_pdobj = funcy.isa(pd.core.base.PandasObject) def assert_pandas_object_equal(first, second, **kwargs): """Test that arbitrary Pandas objects first and second are equal""" if _is_pdobj(first) and _is_pdobj(second): if isinstance(first, type(second)): if isinstance(first, pd.DataFrame): assert_frame_equal(first, second, **kwargs) elif isinstance(first, pd.Series): assert_series_equal(first, second, **kwargs) elif isinstance(first, pd.Index): assert_index_equal(first, second, **kwargs) else: # unreachable? raise AssertionError('you found a bug: unreachable code')
return "Branch(%s, #kids:%d)" % (self.name, len(self.children)) class Leaf(Node): def as_json(self): return self.val @property def children(self): return NodeSet() def __repr__(self): return "Leaf(%s, %s)" % (self.name, self.val) is_branch = isa(Branch) is_leaf = isa(Leaf) class NodeSet(list): @classmethod def from_seq(cls, seq): return cls(*seq) def __init__(self, *nodes): super(NodeSet, self).__init__(nodes) def append_or_extend(self, obj): if is_single(obj): self.append(obj)
import copy from functools import singledispatch from pysistence import Expando from typing import Any, Mapping, List, Tuple, Iterable, Generator, Callable, Union import inspect from . import pipelib import itertools from functools import wraps, update_wrapper import operator as ops PRIMTYPES = {int, bool, float, str, set, list, tuple, dict} LISTLIKE = {set, list, tuple} textual = funcy.isa(str) numeric = funcy.isa(numbers.Number) isint = funcy.isa(int) isdict = funcy.isa(dict) isgen = funcy.isa(types.GeneratorType) def unfold_gen(x: Generator[Any, None, None], cast: type = tuple) -> Iterable[Any]: """Quick recursive unroll of possibly nested (uses funcy library under the hood) """ return cast(funcy.flatten(x, isgen))
def check(self, feature): """Check that the feature's `input` is a str or Iterable[str]""" input = feature.input is_str = isa(str) is_nested_str = all_fn(iterable, lambda x: all(is_str, x)) assert is_str(input) or is_nested_str(input)
def select(typ: ast.AST, top: ast.AST) -> Generator[ast.AST, None, None]: "" wanted = fy.isa(typ) return (node for node in ast.walk(top) if wanted(node))
return "Branch(%s, #kids:%d)" % (self.name, len(self.children)) class Leaf(Node): def as_json(self): return self.val @property def children(self): return NodeSet() def __repr__(self): return "Leaf(%s, %s)" % (self.name, self.val) is_branch = isa(Branch) is_leaf = isa(Leaf) class NodeSet(list): @classmethod def from_seq(cls, seq): return cls(*seq) def __init__(self, *nodes): super(NodeSet, self).__init__(nodes) def append_or_extend(self, obj): if is_single(obj): self.append(obj) else:
from inspect import Parameter PRIMTYPES = {int, bool, float, str, set, list, tuple, dict, bytes} LISTLIKE = {set, list, tuple} TEXTLIKE = {str, bytes} MUTABLE = {list, set, dict} Primitive = Union[int, bool, float, str, set, list, tuple, dict, bytes] Listlike = Union[set, list, tuple] Singular = Union[Primitive, Callable] Plural = Union[set, list, tuple, dict] Dualism = Union[Singular, Plural] PipeCombineFn = Callable[[Any, None, None], Any] textual = fy.isa(*TEXTLIKE) numeric = fy.isa(numbers.Number) isint = fy.isa(int) isdict = fy.isa(dict) isgen = fy.isa(types.GeneratorType) iseq = fy.curry(operator.eq) def unbox(x: Any) -> Singular: """ >>> unbox(1) 1 >>> unbox((1,)) 1 >>> unbox((1,2)) (1, 2)
@property def value(self) -> Any: "Does value" return [child.value for child in self.body] @property def primval(self) -> Any: "Does primval" raise NotImplementedError("Branch nodes doesn't have primitive values") @property def children(self) -> List[Node]: return [simplify(desc.full) for desc in self.desc.children] isbranch = fy.isa(Branch) class Leaf(AbstractNode, ABC): "Marker for leaf nodes" @property def value(self) -> Any: """Value property - for leaf nodes that means the Python literal for literals, the name as a String for Symbols.""" return self.desc.value @value.setter def value(self, value: Any) -> None: self.desc.value = value
transformer=identity, render_strategy=render_ftemplate, ): super().__init__(value, grammar_path, context, transformer=transformer) self._render = render_strategy def __str__(self): return self.render(self.context) def render(self, context=None): return RenderedStr( self._render(super().render(), self.grammar_path, context or self.context)) is_text = isa(Text) class RenderedStr(str): vowels = set("aeiou") @cached_property def an(self): if self[0].lower() in self.vowels: return "an" else: return "a" a = an @cached_property
import textwrap from funcy import is_list, is_mapping, isa from . import db, texts is_bool = isa(bool) def parse_grammar_file(storage, grammar_path, context, included=None): if included is None: included = {grammar_path} else: if grammar_path in included: return context else: included.add(grammar_path) doc = storage.resolve_module(grammar_path) context = parse_includes(storage, grammar_path, doc.pop("include", ()), context, included) render_strategy = parse_render_strategy(doc.pop("render", "ftemplate"), grammar_path) return parse_data(doc, grammar_path, context, render_strategy=render_strategy)
def is_map_or_seq(data): _is_map_or_seq = isa(Mapping, Sequence) return not isinstance(data, str) and _is_map_or_seq(data)
def map_types(self, types, func): return self.map(partial(walk_values, iffy(isa(types), func)))