Beispiel #1
0
    def default(self, obj):
        ordered_attrs = pipe(
            partial(map, lambda attr: (attr, getattr(obj, attr))),
            partial(remove_values, isnone),
            partial(remove_values, all_fn(isa(list, dict), isempty)),
            partial(walk_values, iffy(isa(dict), sort_dict)),
            OrderedDict)

        if isinstance(obj, Context):
            return ordered_attrs(['key', 'operator', 'operand', 'match_all'])
        elif isinstance(obj, Binding):
            return ordered_attrs(['keys', 'command', 'args', 'context'])
        else:
            return super().default(obj)
Beispiel #2
0
    def as_json(self):
        is_array = compose(isa(int), attrgetter("name"))

        if all(is_array, self.children):
            return [x.as_json() for x in self.children]
        else:
            return {x.name:x.as_json() for x in self.children}
Beispiel #3
0
 def is_named(self, name):
     if all(isa(basestring), [self.name, name]):
         return self.name.lower() == name.lower()
     else:
         # most likely one of the arguments is an int - we get str from
         # the parser but the names (if numerical) are stored as ints
         return str(self.name) == str(name)
Beispiel #4
0
    def as_json(self):
        is_array = compose(isa(int), attrgetter("name"))

        if all(is_array, self.children):
            return [x.as_json() for x in self.children]
        else:
            return {x.name: x.as_json() for x in self.children}
Beispiel #5
0
 def _preprocess(self, bindings):
     return pipe(
         deepcopy,
         partial(lflatten, follow=isa(list, tuple, Keymap)),
         self._apply_common_context,
         self._apply_default_match_all
     )(bindings)
Beispiel #6
0
 def _has_correct_input_type(self, feature):
     '''Check that `input` is a string or iterable of string'''
     input = feature.input
     is_str = funcy.isa(str)
     is_nested_str = funcy.all_fn(
         funcy.iterable, lambda x: all(map(is_str, x)))
     assert is_str(input) or is_nested_str(input)
Beispiel #7
0
 def is_named(self, name):
     if all(isa(basestring), [self.name, name]):
         return self.name.lower() == name.lower()
     else:
         # most likely one of the arguments is an int - we get str from
         # the parser but the names (if numerical) are stored as ints
         return str(self.name) == str(name)
Beispiel #8
0
def test_df_colnames(input, transformer, output):
    feature = Feature(input, transformer, output=output)
    mapper = FeatureEngineeringPipeline(feature)
    entities_df = pd.util.testing.makeCustomDataframe(5, 2)
    entities_df.columns = ['foo', 'bar']
    feature_matrix = mapper.fit_transform(entities_df)
    feature_frame = pd.DataFrame(
        feature_matrix,
        columns=mapper.transformed_names_,
        index=entities_df.index,
    )
    assert fy.all(fy.isa(str), feature_frame.columns)
Beispiel #9
0
 def values(self, *fields, **expressions):
     """
     Extended version supporting renames:
         .values('id', 'name', author__name='author')
     """
     renames = select_values(isa(six.string_types), expressions)
     if not renames:
         return base.values(self, *fields, **expressions)
     elif django.VERSION >= (1, 11):
         rename_expressions = walk_values(F, renames)
         expressions.update(rename_expressions)
         return base.values(self, *fields, **expressions)
     else:
         f_to_name = flip(renames)
         rename = lambda d: {f_to_name.get(k, k): v for k, v in d.items()}
         return base.values(self, *chain(fields, f_to_name)).map(rename)
Beispiel #10
0
 def assertPandasObjectNotEqual(self, first, second, msg=None, **kwargs):
     is_pdobj = funcy.isa(pd.core.base.PandasObject)
     if is_pdobj(first) and is_pdobj(second):
         if isinstance(first, type(second)):
             if isinstance(first, pd.DataFrame):
                 self.assertFrameNotEqual(first, second, msg=msg, **kwargs)
             elif isinstance(first, pd.Series):
                 self.assertSeriesNotEqual(first, second, msg=msg, **kwargs)
             elif isinstance(first, pd.Index):
                 self.assertIndexNotEqual(first, second, msg=msg, **kwargs)
             else:
                 # unreachable?
                 raise AssertionError('you found a bug: unreachable code')
     else:
         # it's great that they are uncomparable types :)
         pass
Beispiel #11
0
 def assertPandasObjectEqual(self, first, second, msg=None, **kwargs):
     is_pdobj = funcy.isa(pd.core.base.PandasObject)
     if is_pdobj(first) and is_pdobj(second):
         if isinstance(first, type(second)):
             if isinstance(first, pd.DataFrame):
                 self.assertFrameEqual(first, second, msg=msg, **kwargs)
             elif isinstance(first, pd.Series):
                 self.assertSeriesEqual(first, second, msg=msg, **kwargs)
             elif isinstance(first, pd.Index):
                 self.assertIndexEqual(first, second, msg=msg, **kwargs)
             else:
                 # unreachable?
                 raise AssertionError('you found a bug: unreachable code')
     else:
         standardMsg = '{} and {} are uncomparable types'.format(
             *_common_shorten_repr(first, second))
         msg = self._formatMessage(msg, standardMsg)
         raise self.failureException(msg)
Beispiel #12
0
    def get_data_frame(document_id: str,
                       raw: bool = False,
                       auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        document = Document.get(document_id, auth_args=auth_args)

        file_id = pipe(
            document.get("content", []),
            c.filter(lambda c: c.get("format", {}).get("code") ==
                     "ocr-text-file-id"),
            c.first,
            c.get("attachment", default={}),
            c.get("url"),
            iffy(isa(str), lambda url: url.split("/")[-1]),
        )

        if file_id is None:
            raise ValueError(
                f"No block file found for document: '{document_id}'")

        files = Files(auth.session())
        filename = files.download(file_id, "/tmp/")

        frame = pd.read_json(filename, lines=True)
        os.remove(filename)

        if raw or len(frame) == 0:
            return frame

        return Block.sort(
            frame.drop(["Geometry"], axis=1).join(
                pd.json_normalize(frame.Geometry)).pipe(
                    partial(
                        Frame.expand,
                        custom_columns=[
                            Frame.codeable_like_column_expander("Polygon")
                        ],
                    )).set_index("Id"))
Beispiel #13
0
        def __call__(self, func: Callable):
            mod = sys.modules[func.__module__]

            examples = fy.select(
                fy.isa(dt.Example),
                dt.DocTestParser().parse(str(func.__doc__), func.__name__))

            def scenario(atest):
                def outcome():
                    runner, out = dt.DocTestRunner(), io.StringIO()
                    atest.globs.update(self.ns)
                    res = runner.run(atest, out=out.write, clear_globs=False)
                    self.ns.update(atest.globs)
                    if res.failed == 0:
                        return ''
                    else:  # pragma: nocov
                        return os.linesep.join(out.getvalue().split(
                            os.linesep)[1:])

                return outcome

            def maybe_lineof(obj):
                code = getattr(obj, '__code__', False)
                if code:
                    return code.co_firstlineno
                else:
                    return -1

            def collect():
                self.ns.update(mod.__dict__)
                for example in examples:
                    atest = dt.DocTest([example], self.ns, mod.__name__,
                                       mod.__file__, maybe_lineof(func),
                                       str(func.__doc__))
                    yield scenario(atest)

            return pytest.mark.parametrize("doctest", collect())
Beispiel #14
0
def assert_series_not_equal(first, second, **kwargs):
    """Test that Series first and second are not equal"""
    _assert_pandas_not_equal(pdt.assert_series_equal, first, second, **kwargs)


def assert_index_equal(first, second, **kwargs):
    """Test that Index first and second are equal"""
    _assert_pandas_equal(pdt.assert_index_equal, first, second, **kwargs)


def assert_index_not_equal(first, second, **kwargs):
    """Test that Index first and second are not equal"""
    _assert_pandas_not_equal(pdt.assert_index_equal, first, second, **kwargs)


_is_pdobj = funcy.isa(pd.core.base.PandasObject)


def assert_pandas_object_equal(first, second, **kwargs):
    """Test that arbitrary Pandas objects first and second are equal"""
    if _is_pdobj(first) and _is_pdobj(second):
        if isinstance(first, type(second)):
            if isinstance(first, pd.DataFrame):
                assert_frame_equal(first, second, **kwargs)
            elif isinstance(first, pd.Series):
                assert_series_equal(first, second, **kwargs)
            elif isinstance(first, pd.Index):
                assert_index_equal(first, second, **kwargs)
            else:
                # unreachable?
                raise AssertionError('you found a bug: unreachable code')
Beispiel #15
0
        return "Branch(%s, #kids:%d)" % (self.name, len(self.children))


class Leaf(Node):
    def as_json(self):
        return self.val

    @property
    def children(self):
        return NodeSet()

    def __repr__(self):
        return "Leaf(%s, %s)" % (self.name, self.val)


is_branch = isa(Branch)
is_leaf   = isa(Leaf)



class NodeSet(list):
    @classmethod
    def from_seq(cls, seq):
        return cls(*seq)

    def __init__(self, *nodes):
        super(NodeSet, self).__init__(nodes)

    def append_or_extend(self, obj):
        if is_single(obj):
            self.append(obj)
Beispiel #16
0
import copy
from functools import singledispatch
from pysistence import Expando
from typing import Any, Mapping, List, Tuple, Iterable, Generator, Callable, Union
import inspect
from . import pipelib
import itertools

from functools import wraps, update_wrapper

import operator as ops

PRIMTYPES = {int, bool, float, str, set, list, tuple, dict}
LISTLIKE = {set, list, tuple}

textual = funcy.isa(str)
numeric = funcy.isa(numbers.Number)
isint = funcy.isa(int)
isdict = funcy.isa(dict)
isgen = funcy.isa(types.GeneratorType)


def unfold_gen(x: Generator[Any, None, None],
               cast: type = tuple) -> Iterable[Any]:
    """Quick recursive unroll of possibly nested (uses funcy library under
    the hood)

    """
    return cast(funcy.flatten(x, isgen))

Beispiel #17
0
 def check(self, feature):
     """Check that the feature's `input` is a str or Iterable[str]"""
     input = feature.input
     is_str = isa(str)
     is_nested_str = all_fn(iterable, lambda x: all(is_str, x))
     assert is_str(input) or is_nested_str(input)
Beispiel #18
0
def select(typ: ast.AST, top: ast.AST) -> Generator[ast.AST, None, None]:
    ""
    wanted = fy.isa(typ)
    return (node for node in ast.walk(top) if wanted(node))
Beispiel #19
0
        return "Branch(%s, #kids:%d)" % (self.name, len(self.children))


class Leaf(Node):
    def as_json(self):
        return self.val

    @property
    def children(self):
        return NodeSet()

    def __repr__(self):
        return "Leaf(%s, %s)" % (self.name, self.val)


is_branch = isa(Branch)
is_leaf = isa(Leaf)


class NodeSet(list):
    @classmethod
    def from_seq(cls, seq):
        return cls(*seq)

    def __init__(self, *nodes):
        super(NodeSet, self).__init__(nodes)

    def append_or_extend(self, obj):
        if is_single(obj):
            self.append(obj)
        else:
Beispiel #20
0
from inspect import Parameter

PRIMTYPES = {int, bool, float, str, set, list, tuple, dict, bytes}
LISTLIKE = {set, list, tuple}
TEXTLIKE = {str, bytes}
MUTABLE = {list, set, dict}

Primitive = Union[int, bool, float, str, set, list, tuple, dict, bytes]
Listlike = Union[set, list, tuple]
Singular = Union[Primitive, Callable]
Plural = Union[set, list, tuple, dict]
Dualism = Union[Singular, Plural]

PipeCombineFn = Callable[[Any, None, None], Any]

textual = fy.isa(*TEXTLIKE)
numeric = fy.isa(numbers.Number)
isint = fy.isa(int)
isdict = fy.isa(dict)
isgen = fy.isa(types.GeneratorType)
iseq = fy.curry(operator.eq)


def unbox(x: Any) -> Singular:
    """
    >>> unbox(1)
    1
    >>> unbox((1,))
    1
    >>> unbox((1,2))
    (1, 2)
Beispiel #21
0
    @property
    def value(self) -> Any:
        "Does value"
        return [child.value for child in self.body]

    @property
    def primval(self) -> Any:
        "Does primval"
        raise NotImplementedError("Branch nodes doesn't have primitive values")

    @property
    def children(self) -> List[Node]:
        return [simplify(desc.full) for desc in self.desc.children]


isbranch = fy.isa(Branch)


class Leaf(AbstractNode, ABC):
    "Marker for leaf nodes"

    @property
    def value(self) -> Any:
        """Value property - for leaf nodes that means the Python literal for
        literals, the name as a String for Symbols."""
        return self.desc.value

    @value.setter
    def value(self, value: Any) -> None:
        self.desc.value = value
Beispiel #22
0
        transformer=identity,
        render_strategy=render_ftemplate,
    ):
        super().__init__(value, grammar_path, context, transformer=transformer)
        self._render = render_strategy

    def __str__(self):
        return self.render(self.context)

    def render(self, context=None):
        return RenderedStr(
            self._render(super().render(), self.grammar_path, context
                         or self.context))


is_text = isa(Text)


class RenderedStr(str):
    vowels = set("aeiou")

    @cached_property
    def an(self):
        if self[0].lower() in self.vowels:
            return "an"
        else:
            return "a"

    a = an

    @cached_property
Beispiel #23
0
import textwrap

from funcy import is_list, is_mapping, isa

from . import db, texts

is_bool = isa(bool)


def parse_grammar_file(storage, grammar_path, context, included=None):
    if included is None:
        included = {grammar_path}
    else:
        if grammar_path in included:
            return context
        else:
            included.add(grammar_path)

    doc = storage.resolve_module(grammar_path)
    context = parse_includes(storage, grammar_path, doc.pop("include", ()),
                             context, included)

    render_strategy = parse_render_strategy(doc.pop("render", "ftemplate"),
                                            grammar_path)

    return parse_data(doc,
                      grammar_path,
                      context,
                      render_strategy=render_strategy)

Beispiel #24
0
def is_map_or_seq(data):
    _is_map_or_seq = isa(Mapping, Sequence)
    return not isinstance(data, str) and _is_map_or_seq(data)
Beispiel #25
0
 def map_types(self, types, func):
     return self.map(partial(walk_values, iffy(isa(types), func)))