Esempio n. 1
0
    def error(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        **more_params
    ):
        """
        raise an exception with a trace for the cause too

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        add_to_trace = False
        cause = wrap(unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)]))
        trace = exceptions.extract_stack(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(exceptions.ERROR, template, params, cause, trace)
        raise e
Esempio n. 2
0
def argparse(defs):
    parser = _argparse.ArgumentParser()
    for d in listwrap(defs):
        args = d.copy()
        name = args.name
        args.name = None
        parser.add_argument(*unwrap(listwrap(name)), **args)
    namespace = parser.parse_args()
    output = {k: getattr(namespace, k) for k in vars(namespace)}
    return wrap(output)
Esempio n. 3
0
def value_compare(l, r, ordering=1):
    """
    SORT VALUES, NULL IS THE LEAST VALUE
    :param l: LHS
    :param r: RHS
    :param ordering: (-1, 0, 0) TO AFFECT SORT ORDER
    :return: The return value is negative if x < y, zero if x == y and strictly positive if x > y.
    """

    if l == None:
        if r == None:
            return 0
        else:
            return ordering
    elif r == None:
        return - ordering

    if isinstance(l, list) or isinstance(r, list):
        for a, b in zip(listwrap(l), listwrap(r)):
            c = value_compare(a, b) * ordering
            if c != 0:
                return c

        if len(l) < len(r):
            return - ordering
        elif len(l) > len(r):
            return ordering
        else:
            return 0
    elif isinstance(l, builtin_tuple) and isinstance(r, builtin_tuple):
        for a, b in zip(l, r):
            c = value_compare(a, b) * ordering
            if c != 0:
                return c
        return 0
    elif isinstance(l, Mapping):
        if isinstance(r, Mapping):
            for k in sorted(set(l.keys()) | set(r.keys())):
                c = value_compare(l.get(k), r.get(k)) * ordering
                if c != 0:
                    return c
            return 0
        else:
            return 1
    elif isinstance(r, Mapping):
        return -1
    else:
        return cmp(l, r) * ordering
Esempio n. 4
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        self.type = "range"
        self.NULL = Null

        if self.partitions:
            # IGNORE THE min, max, interval
            if not self.key:
                Log.error("Must have a key value")

            parts = listwrap(self.partitions)
            for i, p in enumerate(parts):
                self.min = Math.min(self.min, p.min)
                self.max = Math.max(self.max, p.max)
                if p.dataIndex != None and p.dataIndex != i:
                    Log.error("Expecting `dataIndex` to agree with the order of the parts")
                if p[self.key] == None:
                    Log.error("Expecting all parts to have {{key}} as a property", key=self.key)
                p.dataIndex = i

            # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
            for p, q in itertools.product(parts, parts):
                if p.min <= q.min and q.min < p.max:
                    Log.error("partitions overlap!")

            self.partitions = parts
            return
        elif any([self.min == None, self.max == None, self.interval == None]):
            Log.error("Can not handle missing parameter")

        self.key = "min"
        self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(frange(self.min, self.max, self.interval))])
Esempio n. 5
0
 def __getitem__(self, item):
     for s in listwrap(self.cube.select):
         if s.name == item:
             return self.cube.data[item]
     for i, e in enumerate(self.cube.edges):
         if e.name == item:
             return e.domain.partition[self.coord[i]]
Esempio n. 6
0
    def warning(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        if "values" in more_params.keys():
            Log.error("Can not handle a logging parameter by name `values`")
        params = dict(unwrap(default_params), **more_params)
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.WARNING, template, params, cause, trace)
        Log.note(
            "{{error|unicode}}",
            error=e,
            log_context=set_default({"context": exceptions.WARNING}, log_context),
            stack_depth=stack_depth + 1
        )
Esempio n. 7
0
def is_terms(query):
    select = listwrap(query.select)

    isSimple = not query.select or AND(aggregates[s.aggregate] in ("none", "count") for s in select)
    if isSimple:
        return True
    return False
Esempio n. 8
0
def _normalize_groupby(groupby, schema=None):
    if groupby == None:
        return None
    output = wrap([_normalize_group(e, schema=schema) for e in listwrap(groupby)])
    if any(o==None for o in output):
        Log.error("not expected")
    return output
Esempio n. 9
0
def read_settings(filename=None, defs=None):
    # READ SETTINGS
    if filename:
        settings_file = File(filename)
        if not settings_file.exists:
            Log.error("Can not file settings file {{filename}}", {
                "filename": settings_file.abspath
            })
        settings = ref.get("file:///" + settings_file.abspath)
        if defs:
            settings.args = argparse(defs)
        return settings
    else:
        defs = listwrap(defs)
        defs.append({
            "name": ["--settings", "--settings-file", "--settings_file"],
            "help": "path to JSON file with settings",
            "type": str,
            "dest": "filename",
            "default": "./settings.json",
            "required": False
        })
        args = argparse(defs)
        settings = ref.get("file://" + args.filename.replace(os.sep, "/"))
        settings.args = args
        return settings
Esempio n. 10
0
def es_countop(es, mvel, query):
    """
    RETURN SINGLE COUNT
    """
    select = listwrap(query.select)
    FromES = build_es_query(query)
    for s in select:

        if is_keyword(s.value):
            FromES.facets[s.name] = {
                "terms": {
                    "field": s.value,
                    "size": query.limit,
                },
                "facet_filter":{"exists":{"field":s.value}}
            }
        else:
            # COMPLICATED value IS PROBABLY A SCRIPT, USE IT
            FromES.facets[s.name] = {
                "terms": {
                    "script_field": es09.expressions.compile_expression(s.value, query),
                    "size": 200000
                }
            }

    data = es09.util.post(es, FromES, query.limit)

    matricies = {}
    for s in select:
        matricies[s.name] = Matrix(value=data.hits.facets[s.name].total)

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube
Esempio n. 11
0
    def select(self, select):
        selects = listwrap(select)

        if not all(isinstance(s.value, Variable) for s in selects):
            Log.error("selecting on structure, or expressions, not supported yet")
        if len(selects) == 1 and isinstance(selects[0].value, Variable) and selects[0].value.var == ".":
            new_schema = self.schema
            if selects[0].name == ".":
                return self
        else:
            new_schema = None

        if isinstance(select, list):
            push_and_pull = [(s.name, jx_expression_to_function(s.value)) for s in selects]
            def selector(d):
                output = Data()
                for n, p in push_and_pull:
                    output[n] = p(wrap(d))
                return unwrap(output)

            new_data = map(selector, self.data)
        else:
            select_value = jx_expression_to_function(select.value)
            new_data = map(select_value, self.data)

        return ListContainer("from "+self.name, data=new_data, schema=new_schema)
Esempio n. 12
0
def es_terms(es, mvel, query):
    """
    RETURN LIST OF ALL EDGE QUERIES

    EVERY FACET IS NAMED <select.name>, <c1>, ... <cN> WHERE <ci> ARE THE ELEMENT COORDINATES
    WE TRY TO PACK DIMENSIONS INTO THE TERMS TO MINIMIZE THE CROSS-PRODUCT EXPLOSION
    """
    if len(query.edges) == 2:
        return _es_terms2(es, mvel, query)

    select = listwrap(query.select)
    FromES = build_es_query(query)
    packed_term = compileEdges2Term(mvel, query.edges, wrap([]))
    for s in select:
        FromES.facets[s.name] = {
            "terms": {
                "field": packed_term.field,
                "script_field": packed_term.expression,
                "size": coalesce(query.limit, 200000),
            },
            "facet_filter": simplify_esfilter(query.where),
        }

    term2Parts = packed_term.term2parts

    data = es09.util.post(es, FromES, query.limit)

    # GETTING ALL PARTS WILL EXPAND THE EDGES' DOMAINS
    # BUT HOW TO UNPACK IT FROM THE term FASTER IS UNKNOWN
    for k, f in data.facets.items():
        for t in f.terms:
            term2Parts(t.term)

    # NUMBER ALL EDGES FOR jx INDEXING
    for f, e in enumerate(query.edges):
        e.index = f
        if e.domain.type in ["uid", "default"]:
            # e.domain.partitions = jx.sort(e.domain.partitions, "value")
            for p, part in enumerate(e.domain.partitions):
                part.dataIndex = p
            e.domain.NULL.dataIndex = len(e.domain.partitions)

    # MAKE CUBE
    output = {}
    dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
    for s in select:
        output[s.name] = Matrix(*dims)

    # FILL CUBE
    # EXPECTING ONLY SELECT CLAUSE FACETS
    for facetName, facet in data.facets.items():
        for term in facet.terms:
            term_coord = term2Parts(term.term).dataIndex
            for s in select:
                try:
                    output[s.name][term_coord] = term[aggregates[s.aggregate]]
                except Exception, e:
                    # USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS
                    pass
Esempio n. 13
0
 def new_instance(desc):
     return Except(
         desc.type,
         desc.template,
         desc.params,
         [Except.new_instance(c) for c in listwrap(desc.cause)],
         desc.trace
     )
Esempio n. 14
0
    def start(cls, settings=None):
        """
        RUN ME FIRST TO SETUP THE THREADED LOGGING
        http://victorlin.me/2012/08/good-logging-practice-in-python/

        log       - LIST OF PARAMETERS FOR LOGGER(S)
        trace     - SHOW MORE DETAILS IN EVERY LOG LINE (default False)
        cprofile  - True==ENABLE THE C-PROFILER THAT COMES WITH PYTHON (default False)
                    USE THE LONG FORM TO SET THE FILENAME {"enabled": True, "filename": "cprofile.tab"}
        profile   - True==ENABLE pyLibrary SIMPLE PROFILING (default False) (eg with Profiler("some description"):)
                    USE THE LONG FORM TO SET FILENAME {"enabled": True, "filename": "profile.tab"}
        constants - UPDATE MODULE CONSTANTS AT STARTUP (PRIMARILY INTENDED TO CHANGE DEBUG STATE)
        """
        global _Thread
        if not settings:
            return
        settings = wrap(settings)

        cls.settings = settings
        cls.trace = cls.trace | coalesce(settings.trace, False)
        if cls.trace:
            from pyLibrary.thread.threads import Thread as _Thread

        if settings.cprofile is False:
            settings.cprofile = {"enabled": False}
        elif settings.cprofile is True or (isinstance(settings.cprofile, Mapping) and settings.cprofile.enabled):
            if isinstance(settings.cprofile, bool):
                settings.cprofile = {"enabled": True, "filename": "cprofile.tab"}

            import cProfile

            cls.cprofiler = cProfile.Profile()
            cls.cprofiler.enable()

        if settings.profile is True or (isinstance(settings.profile, Mapping) and settings.profile.enabled):
            from pyLibrary.debugs import profiles

            if isinstance(settings.profile, bool):
                profiles.ON = True
                settings.profile = {"enabled": True, "filename": "profile.tab"}

            if settings.profile.enabled:
                profiles.ON = True

        if settings.constants:
            constants.set(settings.constants)

        if settings.log:
            cls.logging_multi = TextLog_usingMulti()
            if cls.main_log:
                cls.main_log.stop()
            cls.main_log = TextLog_usingThread(cls.logging_multi)

            for log in listwrap(settings.log):
                Log.add_log(Log.new_instance(log))

        if settings.cprofile.enabled==True:
            Log.alert("cprofiling is enabled, writing to {{filename}}", filename=os.path.abspath(settings.cprofile.filename))
Esempio n. 15
0
    def fatal(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        SEND TO STDERR

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.ERROR, template, params, cause, trace)
        str_e = unicode(e)

        error_mode = cls.error_mode
        with suppress_exception:
            if not error_mode:
                cls.error_mode = True
                Log.note(
                    "{{error|unicode}}",
                    error=e,
                    log_context=set_default({"context": exceptions.FATAL}, log_context),
                    stack_depth=stack_depth + 1
                )
        cls.error_mode = error_mode

        sys.stderr.write(str_e.encode('utf8'))
Esempio n. 16
0
    def vars(self, exclude_where=False, exclude_select=False):
        """
        :return: variables in query
        """
        def edges_get_all_vars(e):
            output = set()
            if isinstance(e.value, basestring):
                output.add(e.value)
            if e.domain.key:
                output.add(e.domain.key)
            if e.domain.where:
                output |= jx_expression(e.domain.where).vars()
            if e.range:
                output |= jx_expression(e.range.min).vars()
                output |= jx_expression(e.range.max).vars()
            if e.domain.partitions:
                for p in e.domain.partitions:
                    if p.where:
                        output |= p.where.vars()
            return output

        output = set()
        try:
            output |= self.frum.vars()
        except Exception:
            pass

        if not exclude_select:
            for s in listwrap(self.select):
                output |= s.value.vars()
        for s in listwrap(self.edges):
            output |= edges_get_all_vars(s)
        for s in listwrap(self.groupby):
            output |= edges_get_all_vars(s)
        if not exclude_where:
            output |= self.where.vars()
        for s in listwrap(self.sort):
            output |= s.value.vars()

        try:
            output |= UNION(e.vars() for e in self.window)
        except Exception:
            pass

        return output
Esempio n. 17
0
 def _select(self, select):
     selects = listwrap(select)
     is_aggregate = OR(s.aggregate != None and s.aggregate != "none" for s in selects)
     if is_aggregate:
         values = {s.name: Matrix(value=self.data[s.value].aggregate(s.aggregate)) for s in selects}
         return Cube(select, [], values)
     else:
         values = {s.name: self.data[s.value] for s in selects}
         return Cube(select, self.edges, values)
Esempio n. 18
0
    def update(self, command):
        """
        EXPECTING command == {"set":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS AN ES FILTER
        """
        command = wrap(command)
        schema = self._es.get_schema()

        # GET IDS OF DOCUMENTS
        results = self._es.search(
            {
                "fields": listwrap(schema._routing.path),
                "query": {
                    "filtered": {"query": {"match_all": {}}, "filter": jx_expression(command.where).to_esfilter()}
                },
                "size": 200000,
            }
        )

        # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
        scripts = FlatList()
        for k, v in command.set.items():
            if not is_keyword(k):
                Log.error("Only support simple paths for now")
            if isinstance(v, Mapping) and v.doc:
                scripts.append({"doc": v.doc})
            else:
                scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby()})

        if results.hits.hits:
            updates = []
            for h in results.hits.hits:
                for s in scripts:
                    updates.append(
                        {
                            "update": {
                                "_id": h._id,
                                "_routing": unwraplist(h.fields[literal_field(schema._routing.path)]),
                            }
                        }
                    )
                    updates.append(s)
            content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode("utf-8")
            response = self._es.cluster.post(
                self._es.path + "/_bulk",
                data=content,
                headers={"Content-Type": "application/json"},
                timeout=self.settings.timeout,
                params={"consistency": self.settings.consistency},
            )
            if response.errors:
                Log.error(
                    "could not update: {{error}}",
                    error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)],
                )
Esempio n. 19
0
        def errors(e, _buffer):  # HANDLE ERRORS FROM extend()
            if e.cause.cause:
                not_possible = [f for f in listwrap(e.cause.cause) if any(h in f for h in HOPELESS)]
                still_have_hope = [f for f in listwrap(e.cause.cause) if all(h not in f for h in HOPELESS)]
            else:
                not_possible = [e]
                still_have_hope = []

            if still_have_hope:
                if "429 EsRejectedExecutionException[rejected execution (queue capacity" in e:
                    Log.note("waiting for ES to be free ({{num}} pending)", num=len(_buffer))
                elif "503 UnavailableShardsException" in e:
                    Log.note("waiting for ES to initialize shards ({{num}} pending)", num=len(_buffer))
                else:
                    Log.warning("Problem with sending to ES ({{num}} pending)", num=len(_buffer), cause=still_have_hope)
            elif not_possible:
                # THERE IS NOTHING WE CAN DO
                Log.warning("Not inserted, will not try again", cause=not_possible[0:10:])
                del _buffer[:]
Esempio n. 20
0
 def _convert_window(self, window):
     return Data(
         name=coalesce(window.name, window.value),
         value=window.value,
         edges=[self._convert_edge(e) for e in listwrap(window.edges)],
         sort=self._convert_sort(window.sort),
         aggregate=window.aggregate,
         range=self._convert_range(window.range),
         where=self._convert_where(window.where)
     )
Esempio n. 21
0
    def __contains__(self, value):
        if isinstance(value, basestring):
            if self.template.find(value) >= 0 or self.message.find(value) >= 0:
                return True

        if self.type == value:
            return True
        for c in listwrap(self.cause):
            if value in c:
                return True
        return False
Esempio n. 22
0
    def insert_new(self, table_name, candidate_key, new_record):
        candidate_key = listwrap(candidate_key)

        condition = " AND\n".join([self.quote_column(k) + "=" + self.quote_value(new_record[k]) if new_record[k] != None else self.quote_column(k) + " IS Null" for k in candidate_key])
        command = "INSERT INTO " + self.quote_column(table_name) + " (" + \
                  ",".join([self.quote_column(k) for k in new_record.keys()]) + \
                  ")\n" + \
                  "SELECT a.* FROM (SELECT " + ",".join([self.quote_value(v) + " " + self.quote_column(k) for k, v in new_record.items()]) + " FROM DUAL) a\n" + \
                  "LEFT JOIN " + \
                  "(SELECT 'dummy' exist FROM " + self.quote_column(table_name) + " WHERE " + condition + " LIMIT 1) b ON 1=1 WHERE exist IS Null"
        self.execute(command, {})
Esempio n. 23
0
def _get_schema_from_list(frum, columns, prefix, nested_path, name_to_column):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix)
            column = name_to_column.get(full_name)
            if not column:
                column = Column(
                    name=full_name,
                    table=".",
                    es_column=full_name,
                    es_index=".",
                    type="undefined",
                    nested_path=nested_path
                )
                columns[full_name] = column
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix + [name])
                column = name_to_column.get(full_name)
                if not column:
                    column = Column(
                        name=full_name,
                        table=".",
                        es_column=full_name,
                        es_index=".",
                        type="undefined",
                        nested_path=nested_path
                    )
                columns[full_name] = column
                if isinstance(value, list):
                    if len(value)==0:
                        this_type = "undefined"
                    elif len(value)==1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], columns, prefix + [name], nested_path, name_to_column)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist([join_field(split_field(np[0])+[name])]+np)
                    _get_schema_from_list(value, columns, prefix + [name], newpath, name_to_column)
Esempio n. 24
0
 def test_trace_of_simple_raises(self):
     try:
         problem_a()
     except Exception, e:
         f = Except.wrap(e)
         self.assertEqual(f.template, "expected exception")
         for i, m in enumerate(listwrap(f.trace).method):
             if m == "test_trace_of_simple_raises":
                 self.assertEqual(i, 2)
                 break
         else:
             self.fail("expecting stack to show this method")
Esempio n. 25
0
    def groupby(self, edges):
        """
        SLICE THIS CUBE IN TO ONES WITH LESS DIMENSIONALITY
        simple==True WILL HAVE GROUPS BASED ON PARTITION VALUE, NOT PARTITION OBJECTS
        """
        edges = FlatList([_normalize_edge(e) for e in edges])

        stacked = [e for e in self.edges if e.name in edges.name]
        remainder = [e for e in self.edges if e.name not in edges.name]
        selector = [1 if e.name in edges.name else 0 for e in self.edges]

        if len(stacked) + len(remainder) != len(self.edges):
            Log.error("can not find some edges to group by")
        # CACHE SOME RESULTS
        keys = edges.name
        getKey = [e.domain.getKey for e in self.edges]
        lookup = [[getKey[i](p) for p in e.domain.partitions+([None] if e.allowNulls else [])] for i, e in enumerate(self.edges)]

        def coord2term(coord):
            output = wrap_leaves({keys[i]: lookup[i][c] for i, c in enumerate(coord)})
            return output

        if isinstance(self.select, list):
            selects = listwrap(self.select)
            index, v = zip(*self.data[selects[0].name].groupby(selector))

            coord = wrap([coord2term(c) for c in index])

            values = [v]
            for s in selects[1::]:
                i, v = zip(*self.data[s.name].group_by(selector))
                values.append(v)

            output = zip(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
        elif not remainder:
            # v IS A VALUE, NO NEED TO WRAP IT IN A Cube
            output = (
                (
                    coord2term(coord),
                    v
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )
        else:
            output = (
                (
                    coord2term(coord),
                    Cube(self.select, remainder, v)
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )

        return output
Esempio n. 26
0
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False):
    """
    :param data:
    :param keys:
    :param size:
    :param min_size:
    :param max_size:
    :param contiguous: MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    :return: return list of (keys, values) PAIRS, WHERE
                 keys IS IN LEAF FORM (FOR USE WITH {"eq": terms} OPERATOR
                 values IS GENERATOR OF ALL VALUE THAT MATCH keys
        contiguous -
    """
    if isinstance(data, Container):
        return data.groupby(keys)

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    try:
        keys = listwrap(keys)
        if not contiguous:
            from pyLibrary.queries import jx
            data = jx.sort(data, keys)

        if not data:
            return Null


        accessor = jx_expression_to_function(jx_expression({"tuple": keys}))  # CAN RETURN Null, WHICH DOES NOT PLAY WELL WITH __cmp__
        def _output():
            start = 0
            prev = accessor(data[0])
            for i, d in enumerate(data):
                curr = accessor(d)
                if curr != prev:
                    group = {}
                    for k, gg in zip(keys, prev):
                        group[k] = gg
                    yield Data(group), data[start:i:]
                    start = i
                    prev = curr
            group = {}
            for k, gg in zip(keys, prev):
                group[k] = gg
            yield Data(group), data[start::]

        return _output()
    except Exception, e:
        Log.error("Problem grouping", cause=e)
Esempio n. 27
0
    def test_full_trace_on_wrap(self):
        try:
            problem_b()
        except Exception, e:
            cause = Except.wrap(e)
            self.assertEqual(cause.template, "expected exception")

            for i, m in enumerate(listwrap(cause.trace).method):
                if m == "test_full_trace_on_wrap":
                    self.assertEqual(i, 1)
                    break
            else:
                self.fail("expecting stack to show this method")
Esempio n. 28
0
    def _groupby(self, edges):
        """
        RETURNS LIST OF (coord, values) TUPLES, WHERE
            coord IS THE INDEX INTO self CUBE (-1 INDEX FOR COORDINATES NOT GROUPED BY)
            values ALL VALUES THAT BELONG TO THE SLICE

        """
        edges = FlatList([_normalize_edge(e) for e in edges])

        stacked = [e for e in self.edges if e.name in edges.name]
        remainder = [e for e in self.edges if e.name not in edges.name]
        selector = [1 if e.name in edges.name else 0 for e in self.edges]

        if len(stacked) + len(remainder) != len(self.edges):
            Log.error("can not find some edges to group by")
        # CACHE SOME RESULTS
        keys = edges.name
        getKey = [e.domain.getKey for e in self.edges]
        lookup = [[getKey[i](p) for p in e.domain.partitions+([None] if e.allowNulls else [])] for i, e in enumerate(self.edges)]

        if isinstance(self.select, list):
            selects = listwrap(self.select)
            index, v = zip(*self.data[selects[0].name].groupby(selector))

            coord = wrap([coord2term(c) for c in index])

            values = [v]
            for s in selects[1::]:
                i, v = zip(*self.data[s.name].group_by(selector))
                values.append(v)

            output = zip(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
        elif not remainder:
            # v IS A VALUE, NO NEED TO WRAP IT IN A Cube
            output = (
                (
                    coord2term(coord),
                    v
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )
        else:
            output = (
                (
                    coord2term(coord),
                    Cube(self.select, remainder, v)
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )

        return output
Esempio n. 29
0
def is_deepop(es, query):
    if query.edges or query.groupby:
        return False
    if all(s.aggregate not in (None, "none") for s in listwrap(query.select)):
        return False
    if len(split_field(query.frum.name)) > 1:
        return True

    # ASSUME IT IS NESTED IF WE ARE ASKING FOR NESTED COLUMNS
    # vars_ = query_get_all_vars(query)
    # columns = query.frum.get_columns()
    # if any(c for c in columns if len(c.nested_path) != 1 and c.name in vars_):
    #    return True
    return False
Esempio n. 30
0
    def send_email(self,
            from_address=None,
            to_address=None,
            subject=None,
            text_data=None,
            html_data=None
    ):
        """Sends an email.

        from_addr is an email address; to_addrs is a list of email adresses.
        Addresses can be plain (e.g. "*****@*****.**") or with real names
        (e.g. "John Smith <*****@*****.**>").

        text_data and html_data are both strings.  You can specify one or both.
        If you specify both, the email will be sent as a MIME multipart
        alternative, i.e., the recipient will see the HTML content if his
        viewer supports it; otherwise he'll see the text content.
        """

        settings = self.settings

        from_address = coalesce(from_address, settings["from"], settings.from_address)
        to_address = listwrap(coalesce(to_address, settings.to_address, settings.to_addrs))

        if not from_address or not to_address:
            raise Exception("Both from_addr and to_addrs must be specified")
        if not text_data and not html_data:
            raise Exception("Must specify either text_data or html_data")

        if not html_data:
            msg = MIMEText(text_data)
        elif not text_data:
            msg = MIMEText(html_data, 'html')
        else:
            msg = MIMEMultipart('alternative')
            msg.attach(MIMEText(text_data, 'plain'))
            msg.attach(MIMEText(html_data, 'html'))

        msg['Subject'] = coalesce(subject, settings.subject)
        msg['From'] = from_address
        msg['To'] = ', '.join(to_address)

        if self.server:
            # CALL AS PART OF A SMTP SESSION
            self.server.sendmail(from_address, to_address, msg.as_string())
        else:
            # CALL AS STAND-ALONE
            with self:
                self.server.sendmail(from_address, to_address, msg.as_string())