Python listwrap Beispiele, pyLibrary.dot.listwrap Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: logs.py Projekt: klahnakoski/MoTreeherder

    def error(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        **more_params
    ):
        """
        raise an exception with a trace for the cause too

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        add_to_trace = False
        cause = wrap(unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)]))
        trace = exceptions.extract_stack(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(exceptions.ERROR, template, params, cause, trace)
        raise e

Beispiel #2

0

Datei anzeigen

Datei: query.py Projekt: klahnakoski/MoDataSubmission

    def map(self, map_):
        def map_select(s, map_):
            return set_default({"value": s.value.map(map_)}, s)

        def map_edge(e, map_):
            partitions = unwraplist([set_default({"where": p.where.map(map_)}, p) for p in e.domain.partitions])

            domain = copy(e.domain)
            domain.where = e.domain.where.map(map_)
            domain.partitions = partitions

            edge = copy(e)
            edge.value = e.value.map(map_)
            edge.domain = domain
            if e.range:
                edge.range.min = e.range.min.map(map_)
                edge.range.max = e.range.max.map(map_)
            return edge

        return QueryOp(
            "from",
            frum=self.frum.map(map_),
            select=wrap([map_select(s, map_) for s in listwrap(self.select)]),
            edges=wrap([map_edge(e, map_) for e in self.edges]),
            groupby=wrap([g.map(map_) for g in self.groupby]),
            window=wrap([w.map(map_) for w in self.window]),
            where=self.where.map(map_),
            sort=wrap([map_select(s, map_) for s in listwrap(self.sort)]),
            limit=self.limit,
            format=self.format,
        )

Beispiel #3

0

Datei anzeigen

Datei: qb.py Projekt: klahnakoski/MoDevETL

def value_compare(l, r, ordering=1):
    if l == None:
        if r == None:
            return 0
        else:
            return - ordering
    elif r == None:
        return ordering

    if isinstance(l, list) or isinstance(r, list):
        for a, b in zip(listwrap(l), listwrap(r)):
            c = value_compare(a, b) * ordering
            if c != 0:
                return c
        return 0
    elif isinstance(l, Mapping):
        if isinstance(r, Mapping):
            for k in set(l.keys()) | set(r.keys()):
                c = value_compare(l[k], r[k]) * ordering
                if c != 0:
                    return c
            return 0
        else:
            return 1
    elif isinstance(r, Mapping):
        return -1
    else:
        return cmp(l, r) * ordering

Beispiel #4

0

Datei anzeigen

Datei: logs.py Projekt: klahnakoski/MoDevETL

    def error(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        **more_params
    ):
        """
        raise an exception with a trace for the cause too
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        add_to_trace = False
        cause = unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)])
        trace = extract_stack(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(ERROR, template, params, cause, trace)
        raise e

Beispiel #5

0

Datei anzeigen

Datei: query.py Projekt: klahnakoski/intermittents

def get_all_vars(query):
    output = []
    for s in listwrap(query.select):
        output.extend(select_get_all_vars(s))
    for s in listwrap(query.edges):
        output.extend(edges_get_all_vars(s))
    for s in listwrap(query.groupby):
        output.extend(edges_get_all_vars(s))
    output.extend(expressions.get_all_vars(query.where))
    return output

Beispiel #6

0

Datei anzeigen

Datei: startup.py Projekt: klahnakoski/MoTreeherder

def argparse(defs):
    parser = _argparse.ArgumentParser()
    for d in listwrap(defs):
        args = d.copy()
        name = args.name
        args.name = None
        parser.add_argument(*unwrap(listwrap(name)), **args)
    namespace = parser.parse_args()
    output = {k: getattr(namespace, k) for k in vars(namespace)}
    return wrap(output)

Beispiel #7

0

Datei anzeigen

Datei: Table_usingSQLite.py Projekt: klahnakoski/MoDataSubmission

    def _set_op(self, query):
        if listwrap(query.select)[0].value == ".":
            selects = ["*"]
        else:
            selects = [s.value.to_sql() + " AS " + quote_table(s.name) for s in listwrap(query.select)]

        for w in query.window:
            selects.append(self._window_op(self, query, w))

        agg = " FROM " + quote_table(self.name) + " a"
        where = "\nWHERE " + query.where.to_sql()

        return "SELECT " + (",\n".join(selects)) + agg + where

Beispiel #8

0

Datei anzeigen

Datei: elasticsearch.py Projekt: klahnakoski/MoDataSubmission

        def errors(e, _buffer):  # HANDLE ERRORS FROM extend()

            if e.cause.cause:
                not_possible = [f for f in listwrap(e.cause.cause) if "JsonParseException" in f or "400 MapperParsingException" in f]
                still_have_hope = [f for f in listwrap(e.cause.cause) if "JsonParseException" not in f and "400 MapperParsingException" not in f]
            else:
                not_possible = [e]
                still_have_hope = []

            if still_have_hope:
                Log.warning("Problem with sending to ES", cause=still_have_hope)
            elif not_possible:
                # THERE IS NOTHING WE CAN DO
                Log.warning("Not inserted, will not try again", cause=not_possible[0:10:])
                del _buffer[:]

Beispiel #9

0

Datei anzeigen

Datei: jx.py Projekt: klahnakoski/esReplicate

def value_compare(l, r, ordering=1):
    """
    SORT VALUES, NULL IS THE LEAST VALUE
    :param l: LHS
    :param r: RHS
    :param ordering: (-1, 0, 0) TO AFFECT SORT ORDER
    :return: The return value is negative if x < y, zero if x == y and strictly positive if x > y.
    """

    if l == None:
        if r == None:
            return 0
        else:
            return ordering
    elif r == None:
        return - ordering

    if isinstance(l, list) or isinstance(r, list):
        for a, b in zip(listwrap(l), listwrap(r)):
            c = value_compare(a, b) * ordering
            if c != 0:
                return c

        if len(l) < len(r):
            return - ordering
        elif len(l) > len(r):
            return ordering
        else:
            return 0
    elif isinstance(l, builtin_tuple) and isinstance(r, builtin_tuple):
        for a, b in zip(l, r):
            c = value_compare(a, b) * ordering
            if c != 0:
                return c
        return 0
    elif isinstance(l, Mapping):
        if isinstance(r, Mapping):
            for k in sorted(set(l.keys()) | set(r.keys())):
                c = value_compare(l.get(k), r.get(k)) * ordering
                if c != 0:
                    return c
            return 0
        else:
            return 1
    elif isinstance(r, Mapping):
        return -1
    else:
        return cmp(l, r) * ordering

Beispiel #10

0

Datei anzeigen

Datei: records.py Projekt: klahnakoski/Activedata-ETL

 def __getitem__(self, item):
     for s in listwrap(self.cube.select):
         if s.name == item:
             return self.cube.data[item]
     for i, e in enumerate(self.cube.edges):
         if e.name == item:
             return e.domain.partition[self.coord[i]]

Beispiel #11

0

Datei anzeigen

Datei: query.py Projekt: klahnakoski/esReplicate

def _normalize_groupby(groupby, schema=None):
    if groupby == None:
        return None
    output = wrap([_normalize_group(e, schema=schema) for e in listwrap(groupby)])
    if any(o==None for o in output):
        Log.error("not expected")
    return output

Beispiel #12

0

Datei anzeigen

Datei: setop.py Projekt: klahnakoski/Activedata-ETL

def es_fieldop(es, query):
    es_query = es14.util.es_query_template()
    select = listwrap(query.select)
    es_query.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter": simplify_esfilter(qb_expression_to_esfilter(query.where))
        }
    }
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = qb_sort_to_es_sort(query.sort)
    es_query.fields = DictList()
    source = "fields"
    for s in select.value:
        if s == "*":
            es_query.fields=None
            source = "_source"
        elif s == ".":
            es_query.fields=None
            source = "_source"
        elif isinstance(s, basestring) and is_keyword(s):
            es_query.fields.append(s)
        elif isinstance(s, list) and es_query.fields is not None:
            es_query.fields.extend(s)
        elif isinstance(s, Mapping) and es_query.fields is not None:
            es_query.fields.extend(s.values())
        elif es_query.fields is not None:
            es_query.fields.append(s)
    es_query.sort = [{s.field: "asc" if s.sort >= 0 else "desc"} for s in query.sort]

    return extract_rows(es, es_query, source, select, query)

Beispiel #13

0

Datei anzeigen

Datei: logs.py Projekt: klahnakoski/MoTreeherder

    def warning(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        if "values" in more_params.keys():
            Log.error("Can not handle a logging parameter by name `values`")
        params = dict(unwrap(default_params), **more_params)
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.WARNING, template, params, cause, trace)
        Log.note(
            "{{error|unicode}}",
            error=e,
            log_context=set_default({"context": exceptions.WARNING}, log_context),
            stack_depth=stack_depth + 1
        )

Beispiel #14

0

Datei anzeigen

Datei: query.py Projekt: klahnakoski/Activedata-ETL

def get_all_vars(query, exclude_where=False):
    """
    :param query:
    :param exclude_where: Sometimes we do not what to look at the where clause
    :return: all variables in use by query
    """
    output = []
    for s in listwrap(query.select):
        output.extend(select_get_all_vars(s))
    for s in listwrap(query.edges):
        output.extend(edges_get_all_vars(s))
    for s in listwrap(query.groupby):
        output.extend(edges_get_all_vars(s))
    if not exclude_where:
        output.extend(expressions.get_all_vars(query.where))
    return output

Beispiel #15

0

Datei anzeigen

Datei: group_by.py Projekt: klahnakoski/esReplicate

def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False):
    """
        return list of (keys, values) pairs where
            group by the set of keys
            values IS LIST OF ALL data that has those keys
        contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    """

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    if isinstance(data, Container):
        return data.groupby(keys)

    try:
        keys = listwrap(keys)
        get_key = jx_expression_to_function(keys)
        if not contiguous:
            data = sorted(data, key=get_key)

        def _output():
            for g, v in itertools.groupby(data, get_key):
                group = Dict()
                for k, gg in zip(keys, g):
                    group[k] = gg
                yield (group, wrap(v))

        return _output()

    except Exception, e:
        Log.error("Problem grouping", e)

Beispiel #16

0

Datei anzeigen

Datei: aggop.py Projekt: klahnakoski/MoDataSubmission

def es_countop(es, mvel, query):
    """
    RETURN SINGLE COUNT
    """
    select = listwrap(query.select)
    FromES = build_es_query(query)
    for s in select:

        if is_keyword(s.value):
            FromES.facets[s.name] = {
                "terms": {
                    "field": s.value,
                    "size": query.limit,
                },
                "facet_filter":{"exists":{"field":s.value}}
            }
        else:
            # COMPLICATED value IS PROBABLY A SCRIPT, USE IT
            FromES.facets[s.name] = {
                "terms": {
                    "script_field": es09.expressions.compile_expression(s.value, query),
                    "size": 200000
                }
            }

    data = es09.util.post(es, FromES, query.limit)

    matricies = {}
    for s in select:
        matricies[s.name] = Matrix(value=data.hits.facets[s.name].total)

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube

Beispiel #17

0

Datei anzeigen

Datei: expressions.py Projekt: mozilla/ChangeDetector

def query_get_all_vars(query, exclude_where=False):
    """
    :param query:
    :param exclude_where: Sometimes we do not what to look at the where clause
    :return: all variables in use by query
    """
    output = set()
    for s in listwrap(query.select):
        output |= select_get_all_vars(s)
    for s in listwrap(query.edges):
        output |= edges_get_all_vars(s)
    for s in listwrap(query.groupby):
        output |= edges_get_all_vars(s)
    if not exclude_where:
        output |= qb_expression(query.where).vars()
    return output

Beispiel #18

0

Datei anzeigen

Datei: expressions.py Projekt: mozilla/ChangeDetector

def split_expression_by_depth(where, schema, map_, output=None, var_to_depth=None):
    """
    It is unfortunate that ES can not handle expressions that
    span nested indexes.  This will split your where clause
    returning {"and": [filter_depth0, filter_depth1, ...]}
    """
    vars_ = where.vars()

    if var_to_depth is None:
        if not vars_:
            return Null
        # MAP VARIABLE NAMES TO HOW DEEP THEY ARE
        var_to_depth = {v: len(listwrap(schema[v].nested_path)) for v in vars_}
        all_depths = set(var_to_depth.values())
        output = wrap([[] for _ in range(MAX(all_depths) + 1)])
    else:
        all_depths = set(var_to_depth[v] for v in vars_)

    if len(all_depths) == 1:
        output[list(all_depths)[0]] += [where.map(map_)]
    elif isinstance(where, AndOp):
        for a in where.terms:
            split_expression_by_depth(a, schema, map_, output, var_to_depth)
    else:
        Log.error("Can not handle complex where clause")

    return output

Beispiel #19

0

Datei anzeigen

Datei: logs.py Projekt: klahnakoski/intermittents

    def warning(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,       # stack trace offset (==1 if you do not want to report self)
        **more_params
    ):
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = extract_stack(stack_depth + 1)

        e = Except(WARNING, template, params, cause, trace)
        Log.note(
            unicode(e),
            {
                "warning": {# REDUNDANT INFO
                    "template": template,
                    "params": params,
                    "cause": cause,
                    "trace": trace
                }
            },
            stack_depth=stack_depth + 1
        )

Beispiel #20

0

Datei anzeigen

Datei: setop.py Projekt: klahnakoski/Activedata-ETL

def es_setop(es, query):
    es_query = es14.util.es_query_template()
    select = listwrap(query.select)

    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.fields = DictList()
    es_query.sort = qb_sort_to_es_sort(query.sort)
    source = "fields"
    for s in select:
        if s.value == "*":
            es_query.fields = None
            es_query.script_fields = None
            source = "_source"
        elif s.value == ".":
            es_query.fields = None
            es_query.script_fields = None
            source = "_source"
        elif isinstance(s.value, basestring) and is_keyword(s.value):
            es_query.fields.append(s.value)
        elif isinstance(s.value, list) and es_query.fields is not None:
            es_query.fields.extend(s.value)
        else:
            es_query.script_fields[literal_field(s.name)] = {"script": qb_expression_to_ruby(s.value)}

    return extract_rows(es, es_query, source, select, query)

Beispiel #21

0

Datei anzeigen

Datei: terms.py Projekt: klahnakoski/Activedata-ETL

def is_terms(query):
    select = listwrap(query.select)

    isSimple = not query.select or AND(aggregates[s.aggregate] in ("none", "count") for s in select)
    if isSimple:
        return True
    return False

Beispiel #22

0

Datei anzeigen

Datei: lists.py Projekt: klahnakoski/esReplicate

    def select(self, select):
        selects = listwrap(select)

        if not all(isinstance(s.value, Variable) for s in selects):
            Log.error("selecting on structure, or expressions, not supported yet")
        if len(selects) == 1 and isinstance(selects[0].value, Variable) and selects[0].value.var == ".":
            new_schema = self.schema
            if selects[0].name == ".":
                return self
        else:
            new_schema = None

        if isinstance(select, list):
            push_and_pull = [(s.name, jx_expression_to_function(s.value)) for s in selects]
            def selector(d):
                output = Dict()
                for n, p in push_and_pull:
                    output[n] = p(wrap(d))
                return unwrap(output)

            new_data = map(selector, self.data)
        else:
            select_value = jx_expression_to_function(select.value)
            new_data = map(select_value, self.data)

        return ListContainer("from "+self.name, data=new_data, schema=new_schema)

Beispiel #23

0

Datei anzeigen

Datei: aggs.py Projekt: klahnakoski/MoDataSubmission

def is_aggsop(es, query):
    es.cluster.get_metadata()
    if any(map(es.cluster.version.startswith, ["1.4.", "1.5.", "1.6.", "1.7."])) and (
        query.edges or query.groupby or any(a != None and a != "none" for a in listwrap(query.select).aggregate)
    ):
        return True
    return False

Beispiel #24

0

Datei anzeigen

Datei: lists.py Projekt: klahnakoski/MoDevETL

def _get_schema_from_list(frum, columns, prefix, nested_path):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    names = {}
    for d in frum:
        for name, value in d.items():
            agg_type = names.get(name, "undefined")
            this_type = _type_to_name[value.__class__]
            new_type = _merge_type[agg_type][this_type]
            names[name] = new_type

            if this_type == "object":
                _get_schema_from_list([value], columns, prefix + [name], nested_path)
            elif this_type == "nested":
                np = listwrap(nested_path)
                newpath = unwraplist([".".join((np[0], name))]+np)
                _get_schema_from_list(value, columns, prefix + [name], newpath)

    for n, t in names.items():
        full_name = ".".join(prefix + [n])
        column = Column(
            table=".",
            name=full_name,
            abs_name=full_name,
            type=t,
            nested_path=nested_path
        )
        columns[column.name] = column

Beispiel #25

0

Datei anzeigen

Datei: aggs.py Projekt: mozilla/ChangeDetector

def get_decoders_by_depth(query):
    """
    RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH
    """
    schema = query.frum
    output = DictList()
    for e in coalesce(query.edges, query.groupby, []):
        if e.value:
            e = e.copy()
            e.value = qb_expression(e.value)
            vars_ = e.value.vars()

            for v in vars_:
                if not schema[v]:
                    Log.error("{{var}} does not exist in schema", var=v)

            e.value = e.value.map({schema[v].name: schema[v].abs_name for v in vars_})
        else:
            vars_ = e.domain.dimension.fields
            e.domain.dimension = e.domain.dimension.copy()
            e.domain.dimension.fields = [schema[v].abs_name for v in vars_]

        depths = set(len(listwrap(schema[v].nested_path)) for v in vars_)
        if len(depths) > 1:
            Log.error("expression {{expr}} spans tables, can not handle", expr=e.value)
        depth = list(depths)[0]
        while len(output) <= depth:
            output.append([])
        output[depth].append(AggsDecoder(e, query))
    return output

Beispiel #26

0

Datei anzeigen

Datei: startup.py Projekt: klahnakoski/MoTreeherder

def read_settings(filename=None, defs=None):
    # READ SETTINGS
    if filename:
        settings_file = File(filename)
        if not settings_file.exists:
            Log.error("Can not file settings file {{filename}}", {
                "filename": settings_file.abspath
            })
        settings = ref.get("file:///" + settings_file.abspath)
        if defs:
            settings.args = argparse(defs)
        return settings
    else:
        defs = listwrap(defs)
        defs.append({
            "name": ["--settings", "--settings-file", "--settings_file"],
            "help": "path to JSON file with settings",
            "type": str,
            "dest": "filename",
            "default": "./settings.json",
            "required": False
        })
        args = argparse(defs)
        settings = ref.get("file://" + args.filename.replace(os.sep, "/"))
        settings.args = args
        return settings

Beispiel #27

0

Datei anzeigen

Datei: domains.py Projekt: klahnakoski/TestFailures

    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        self.type = "range"
        self.NULL = Null

        if self.partitions:
            # IGNORE THE min, max, interval
            if not self.key:
                Log.error("Must have a key value")

            parts = listwrap(self.partitions)
            for i, p in enumerate(parts):
                self.min = Math.min(self.min, p.min)
                self.max = Math.max(self.max, p.max)
                if p.dataIndex != None and p.dataIndex != i:
                    Log.error("Expecting `dataIndex` to agree with the order of the parts")
                if p[self.key] == None:
                    Log.error("Expecting all parts to have {{key}} as a property", key=self.key)
                p.dataIndex = i

            # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
            for p, q in itertools.product(parts, parts):
                if p.min <= q.min and q.min < p.max:
                    Log.error("partitions overlap!")

            self.partitions = parts
            return
        elif any([self.min == None, self.max == None, self.interval == None]):
            Log.error("Can not handle missing parameter")

        self.key = "min"
        self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(frange(self.min, self.max, self.interval))])

Beispiel #28

0

Datei anzeigen

Datei: logs.py Projekt: klahnakoski/MoDataSubmission

    def fatal(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        SEND TO STDERR

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.ERROR, template, params, cause, trace)
        str_e = unicode(e)

        error_mode = cls.error_mode
        try:
            if not error_mode:
                cls.error_mode = True
                Log.note(
                    "{{error|unicode}}",
                    error=e,
                    log_context=set_default({"context": exceptions.FATAL}, log_context),
                    stack_depth=stack_depth + 1
                )
        except Exception:
            pass
        cls.error_mode = error_mode

        sys.stderr.write(str_e.encode('utf8'))

Beispiel #29

0

Datei anzeigen

Datei: app.py Projekt: klahnakoski/MoDataSubmission

def store_data(path):
    try:
        request = flask.request
        auth = request.headers.get('Authorization')

        if not auth:
            # USE PATTERN MATCHING AUTH
            for c in all_creds:
                if c.path == path:
                    return store_public_data(path, c)
            raise Log.error(
                "No authentication provided.  path={{path}} data.length={{length}}",
                path=path,
                length=len(request.get_data()),
            )

        try:
            receiver = Receiver(
                lookup_credentials,
                auth,
                request.url,
                request.method,
                content=request.get_data(),
                content_type=request.headers['Content-Type'],
                seen_nonce=seen_nonce
            )
        except Exception, e:
            e = Except.wrap(e)
            raise Log.error(
                "Authentication failed.  path={{path}} data.length={{length}}\n{{auth|indent}}",
                path=path,
                length=len(request.get_data()),
                auth=auth,
                cause=e
            )

        permissions = lookup_user(receiver.parsed_header["id"])
        if path not in listwrap(permissions.resources):
            Log.error("{{user}} not allowed access to {{resource}}", user=permissions.hawk.id, resource=path)

        link, id = submit_data(path, permissions, request.json)

        response_content = convert.unicode2utf8(convert.value2json({
            "link": link,
            "etl": {"id": id}
        }))
        receiver.respond(
            content=response_content,
            content_type=RESPONSE_CONTENT_TYPE
        )

        return Response(
            response_content,
            status=200,
            headers={
                b'Server-Authorization': receiver.response_header,
                b'content-type': RESPONSE_CONTENT_TYPE
            }
        )

Beispiel #30

0

Datei anzeigen

Datei: terms.py Projekt: klahnakoski/Activedata-ETL

def es_terms(es, mvel, query):
    """
    RETURN LIST OF ALL EDGE QUERIES

    EVERY FACET IS NAMED <select.name>, <c1>, ... <cN> WHERE <ci> ARE THE ELEMENT COORDINATES
    WE TRY TO PACK DIMENSIONS INTO THE TERMS TO MINIMIZE THE CROSS-PRODUCT EXPLOSION
    """
    if len(query.edges) == 2:
        return _es_terms2(es, mvel, query)

    select = listwrap(query.select)
    FromES = build_es_query(query)
    packed_term = compileEdges2Term(mvel, query.edges, wrap([]))
    for s in select:
        FromES.facets[s.name] = {
            "terms": {
                "field": packed_term.field,
                "script_field": packed_term.expression,
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }

    term2Parts = packed_term.term2parts

    data = es09.util.post(es, FromES, query.limit)

    # GETTING ALL PARTS WILL EXPAND THE EDGES' DOMAINS
    # BUT HOW TO UNPACK IT FROM THE term FASTER IS UNKNOWN
    for k, f in data.facets.items():
        for t in f.terms:
            term2Parts(t.term)

    # NUMBER ALL EDGES FOR qb INDEXING
    for f, e in enumerate(query.edges):
        e.index = f
        if e.domain.type in ["uid", "default"]:
            # e.domain.partitions = qb.sort(e.domain.partitions, "value")
            for p, part in enumerate(e.domain.partitions):
                part.dataIndex = p
            e.domain.NULL.dataIndex = len(e.domain.partitions)

    # MAKE CUBE
    output = {}
    dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
    for s in select:
        output[s.name] = Matrix(*dims)

    # FILL CUBE
    # EXPECTING ONLY SELECT CLAUSE FACETS
    for facetName, facet in data.facets.items():
        for term in facet.terms:
            term_coord = term2Parts(term.term).dataIndex
            for s in select:
                try:
                    output[s.name][term_coord] = term[aggregates[s.aggregate]]
                except Exception, e:
                    # USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS
                    pass

Beispiel #31

0

Datei anzeigen

    def groupby(self, edges):
        """
        SLICE THIS CUBE IN TO ONES WITH LESS DIMENSIONALITY
        simple==True WILL HAVE GROUPS BASED ON PARTITION VALUE, NOT PARTITION OBJECTS
        """
        edges = DictList([_normalize_edge(e) for e in edges])

        stacked = [e for e in self.edges if e.name in edges.name]
        remainder = [e for e in self.edges if e.name not in edges.name]
        selector = [1 if e.name in edges.name else 0 for e in self.edges]

        if len(stacked) + len(remainder) != len(self.edges):
            Log.error("can not find some edges to group by")
        # CACHE SOME RESULTS
        keys = edges.name
        getKey = [e.domain.getKey for e in self.edges]
        lookup = [[getKey[i](p) for p in e.domain.partitions+([None] if e.allowNulls else [])] for i, e in enumerate(self.edges)]

        def coord2term(coord):
            output = wrap_dot({keys[i]: lookup[i][c] for i, c in enumerate(coord)})
            return output

        if isinstance(self.select, list):
            selects = listwrap(self.select)
            index, v = zip(*self.data[selects[0].name].groupby(selector))

            coord = wrap([coord2term(c) for c in index])

            values = [v]
            for s in selects[1::]:
                i, v = zip(*self.data[s.name].group_by(selector))
                values.append(v)

            output = zip(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
        elif not remainder:
            # v IS A VALUE, NO NEED TO WRAP IT IN A Cube
            output = (
                (
                    coord2term(coord),
                    v
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )
        else:
            output = (
                (
                    coord2term(coord),
                    Cube(self.select, remainder, v)
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )

        return output

Beispiel #32

0

Datei anzeigen

Datei: cube.py Projekt: davehunt/ActiveData

 def _select(self, select):
     selects = listwrap(select)
     is_aggregate = OR(s.aggregate != None and s.aggregate != "none"
                       for s in selects)
     if is_aggregate:
         values = {
             s.name: Matrix(value=self.data[s.value].aggregate(s.aggregate))
             for s in selects
         }
         return Cube(select, [], values)
     else:
         values = {s.name: self.data[s.value] for s in selects}
         return Cube(select, self.edges, values)

Beispiel #33

0

Datei anzeigen

Datei: cube.py Projekt: klahnakoski/MoTreeherder

    def _groupby(self, edges):
        """
        RETURNS LIST OF (coord, values) TUPLES, WHERE
            coord IS THE INDEX INTO self CUBE (-1 INDEX FOR COORDINATES NOT GROUPED BY)
            values ALL VALUES THAT BELONG TO THE SLICE

        """
        edges = DictList([_normalize_edge(e) for e in edges])

        stacked = [e for e in self.edges if e.name in edges.name]
        remainder = [e for e in self.edges if e.name not in edges.name]
        selector = [1 if e.name in edges.name else 0 for e in self.edges]

        if len(stacked) + len(remainder) != len(self.edges):
            Log.error("can not find some edges to group by")
        # CACHE SOME RESULTS
        keys = edges.name
        getKey = [e.domain.getKey for e in self.edges]
        lookup = [[getKey[i](p) for p in e.domain.partitions+([None] if e.allowNulls else [])] for i, e in enumerate(self.edges)]

        if isinstance(self.select, list):
            selects = listwrap(self.select)
            index, v = zip(*self.data[selects[0].name].groupby(selector))

            coord = wrap([coord2term(c) for c in index])

            values = [v]
            for s in selects[1::]:
                i, v = zip(*self.data[s.name].group_by(selector))
                values.append(v)

            output = zip(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
        elif not remainder:
            # v IS A VALUE, NO NEED TO WRAP IT IN A Cube
            output = (
                (
                    coord2term(coord),
                    v
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )
        else:
            output = (
                (
                    coord2term(coord),
                    Cube(self.select, remainder, v)
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )

        return output

Beispiel #34

0

Datei anzeigen

def is_deepop(es, query):
    if query.edges or query.groupby:
        return False
    if all(s.aggregate not in (None, "none") for s in listwrap(query.select)):
        return False
    if len(split_field(query.frum.name)) > 1:
        return True

    # ASSUME IT IS NESTED IF WE ARE ASKING FOR NESTED COLUMNS
    # vars_ = query_get_all_vars(query)
    # columns = query.frum.get_columns()
    # if any(c for c in columns if c.nested_path and c.name in vars_):
    #    return True
    return False

Beispiel #35

0

Datei anzeigen

Datei: base_test_class.py Projekt: davehunt/ActiveData

def compare_to_expected(query, result, expect):
    query = wrap(query)
    expect = wrap(expect)

    if result.meta.format == "table":
        assertAlmostEqual(set(result.header), set(expect.header))

        # MAP FROM expected COLUMN TO result COLUMN
        mapping = zip(*zip(*filter(
            lambda v: v[0][1] == v[1][1],
            itertools.product(enumerate(expect.header), enumerate(
                result.header))))[1])[0]
        result.header = [result.header[m] for m in mapping]

        if result.data:
            columns = zip(*unwrap(result.data))
            result.data = zip(*[columns[m] for m in mapping])

        if not query.sort:
            sort_table(result)
            sort_table(expect)
    elif result.meta.format == "list":
        if query["from"].startswith("meta."):
            pass
        else:
            query = QueryOp.wrap(query)

        if not query.sort:
            try:
                #result.data MAY BE A LIST OF VALUES, NOT OBJECTS
                data_columns = jx.sort(
                    set(jx.get_columns(result.data, leaves=True))
                    | set(jx.get_columns(expect.data, leaves=True)), "name")
            except Exception:
                data_columns = [{"name": "."}]

            sort_order = listwrap(coalesce(query.edges,
                                           query.groupby)) + data_columns

            if isinstance(expect.data, list):
                try:
                    expect.data = jx.sort(expect.data, sort_order.name)
                except Exception, _:
                    pass

            if isinstance(result.data, list):
                try:
                    result.data = jx.sort(result.data, sort_order.name)
                except Exception, _:
                    pass

Beispiel #36

0

Datei anzeigen

    def send_email(self,
            from_address=None,
            to_address=None,
            subject=None,
            text_data=None,
            html_data=None
    ):
        """Sends an email.

        from_addr is an email address; to_addrs is a list of email adresses.
        Addresses can be plain (e.g. "*****@*****.**") or with real names
        (e.g. "John Smith <*****@*****.**>").

        text_data and html_data are both strings.  You can specify one or both.
        If you specify both, the email will be sent as a MIME multipart
        alternative, i.e., the recipient will see the HTML content if his
        viewer supports it; otherwise he'll see the text content.
        """

        settings = self.settings

        from_address = coalesce(from_address, settings["from"], settings.from_address)
        to_address = listwrap(coalesce(to_address, settings.to_address, settings.to_addrs))

        if not from_address or not to_address:
            raise Exception("Both from_addr and to_addrs must be specified")
        if not text_data and not html_data:
            raise Exception("Must specify either text_data or html_data")

        if not html_data:
            msg = MIMEText(text_data)
        elif not text_data:
            msg = MIMEText(html_data, 'html')
        else:
            msg = MIMEMultipart('alternative')
            msg.attach(MIMEText(text_data, 'plain'))
            msg.attach(MIMEText(html_data, 'html'))

        msg['Subject'] = coalesce(subject, settings.subject)
        msg['From'] = from_address
        msg['To'] = ', '.join(to_address)

        if self.server:
            # CALL AS PART OF A SMTP SESSION
            self.server.sendmail(from_address, to_address, msg.as_string())
        else:
            # CALL AS STAND-ALONE
            with self:
                self.server.sendmail(from_address, to_address, msg.as_string())

Beispiel #37

0

Datei anzeigen

def groupby(data,
            keys=None,
            size=None,
            min_size=None,
            max_size=None,
            contiguous=False):
    """
        return list of (keys, values) pairs where
            group by the set of keys
            values IS LIST OF ALL data that has those keys
        contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    """

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    if isinstance(data, Cube):
        return data.groupby(keys)

    keys = listwrap(keys)

    def get_keys(d):
        output = Dict()
        for k in keys:
            output[k] = d[k]
        return output

    if contiguous:
        try:
            if not data:
                return wrap([])

            agg = DictList()
            acc = DictList()
            curr_key = value2key(keys, data[0])
            for d in data:
                key = value2key(keys, d)
                if key != curr_key:
                    agg.append((get_keys(acc[0]), acc))
                    curr_key = key
                    acc = [d]
                else:
                    acc.append(d)
            agg.append((get_keys(acc[0]), acc))
            return wrap(agg)
        except Exception, e:
            Log.error("Problem grouping contiguous values", e)

Beispiel #38

0

Datei anzeigen

Datei: mysql.py Projekt: mozilla/ActiveData-ETL

    def insert_new(self, table_name, candidate_key, new_record):
        candidate_key = listwrap(candidate_key)

        condition = " AND\n".join([
            self.quote_column(k) + "=" + self.quote_value(new_record[k])
            if new_record[k] != None else self.quote_column(k) + " IS Null"
            for k in candidate_key
        ])
        command = "INSERT INTO " + self.quote_column(table_name) + " (" + \
                  ",".join([self.quote_column(k) for k in new_record.keys()]) + \
                  ")\n" + \
                  "SELECT a.* FROM (SELECT " + ",".join([self.quote_value(v) + " " + self.quote_column(k) for k, v in new_record.items()]) + " FROM DUAL) a\n" + \
                  "LEFT JOIN " + \
                  "(SELECT 'dummy' exist FROM " + self.quote_column(table_name) + " WHERE " + condition + " LIMIT 1) b ON 1=1 WHERE exist IS Null"
        self.execute(command, {})

Beispiel #39

0

Datei anzeigen

    def error(
            cls,
            template,  # human readable template
            default_params={},  # parameters for template
            cause=None,  # pausible cause
            stack_depth=0,
            **more_params):
        """
        raise an exception with a trace for the cause too

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if default_params and isinstance(
                listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        add_to_trace = False
        cause = wrap(
            unwraplist(
                [Except.wrap(c, stack_depth=1) for c in listwrap(cause)]))
        trace = exceptions.extract_stack(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(exceptions.ERROR, template, params, cause, trace)
        raise e

Beispiel #40

0

Datei anzeigen

Datei: query.py Projekt: klahnakoski/MoTreeherder

def _normalize_window(window, schema=None):
    v = window.value
    try:
        expr = jx_expression(v)
    except Exception:
        expr = ScriptOp("script", v)

    return Dict(
        name=coalesce(window.name, window.value),
        value=expr,
        edges=[_normalize_edge(e, schema) for e in listwrap(window.edges)],
        sort=_normalize_sort(window.sort),
        aggregate=window.aggregate,
        range=_normalize_range(window.range),
        where=_normalize_where(window.where, schema=schema))

Beispiel #41

0

Datei anzeigen

Datei: rename.py Projekt: davehunt/ActiveData

    def _convert_edge(self, edge):
        dim = self.dimensions[edge.value]
        if not dim:
            return edge

        if len(listwrap(dim.fields)) == 1:
            #TODO: CHECK IF EDGE DOMAIN AND DIMENSION DOMAIN CONFLICT
            new_edge = set_default({"value": unwraplist(dim.fields)}, edge)
            return new_edge
            new_edge.domain = dim.getDomain()

        edge = copy(edge)
        edge.value = None
        edge.domain = dim.getDomain()
        return edge

Beispiel #42

0

Datei anzeigen

    def _edges_op(self, query):
        selects = []
        for s in listwrap(query.select):
            if s.value == "." and s.aggregate == "count":
                selects.append("COUNT(1) AS " + quote_table(s.name))
            else:
                selects.append(sql_aggs[s.aggregate] + "(" +
                               jx_expression(s.value).to_sql() + ") AS " +
                               quote_table(s.name))

        for w in query.window:
            selects.append(self._window_op(self, query, w))

        agg_prefix = " FROM "
        agg_suffix = "\n"

        agg = ""
        ons = []
        groupby = ""
        groupby_prefix = "\nGROUP BY "

        for i, e in enumerate(query.edges):
            edge_alias = "e" + unicode(i)
            edge_value = e.value.to_sql()
            value = edge_value
            for v in e.value.vars():
                value = value.replace(quote_table(v), "a." + quote_table(v))

            edge_name = quote_table(e.name)
            selects.append(edge_alias + "." + edge_name + " AS " + edge_name)
            agg += \
                agg_prefix + "(" + \
                "SELECT DISTINCT " + edge_value + " AS " + edge_name + " FROM " + quote_table(self.name) + \
                ") " + edge_alias + \
                agg_suffix
            agg_prefix = " LEFT JOIN "
            agg_suffix = " ON 1=1\n"
            ons.append(edge_alias + "." + edge_name + " = " + value)
            groupby += groupby_prefix + edge_alias + "." + edge_name
            groupby_prefix = ",\n"

        agg += agg_prefix + quote_table(
            self.name) + " a ON " + " AND ".join(ons)

        where = "\nWHERE " + query.where.to_sql()

        return "SELECT " + (",\n".join(selects)) + agg + where + groupby

Beispiel #43

0

Datei anzeigen

Datei: logs.py Projekt: mozilla/ActiveData-ETL

    def fatal(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,  # stack trace offset (==1 if you do not want to report self)
        **more_params):
        """
        SEND TO STDERR
        """
        if default_params and isinstance(
                listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        if cause == None:
            cause = []
        elif isinstance(cause, list):
            pass
        elif isinstance(cause, Except):
            cause = [cause]
        else:
            cause = [
                Except(ERROR, unicode(cause), trace=extract_tb(stack_depth))
            ]

        trace = extract_stack(1 + stack_depth)
        e = Except(ERROR, template, params, cause, trace)
        str_e = unicode(e)

        error_mode = cls.error_mode
        try:
            if not error_mode:
                cls.error_mode = True
                Log.note(
                    str_e, {
                        "error": {
                            "template": template,
                            "params": params,
                            "cause": cause,
                            "trace": trace
                        }
                    })
        except Exception, f:
            pass

Beispiel #44

0

Datei anzeigen

Datei: logs.py Projekt: mozilla/ActiveData-ETL

    def __str__(self):
        output = self.type + ": " + self.template + "\n"
        if self.params:
            output = expand_template(output, self.params)

        if self.trace:
            output += indent(format_trace(self.trace))

        if self.cause:
            cause_strings = []
            for c in listwrap(self.cause):
                try:
                    cause_strings.append(unicode(c))
                except Exception, e:
                    pass

            output += "caused by\n\t" + "and caused by\n\t".join(cause_strings)

Beispiel #45

0

Datei anzeigen

Datei: exceptions.py Projekt: davehunt/ActiveData

    def __unicode__(self):
        output = self.type + ": " + self.template + "\n"
        if self.params:
            output = expand_template(output, self.params)

        if self.trace:
            output += indent(format_trace(self.trace))

        if self.cause:
            cause_strings = []
            for c in listwrap(self.cause):
                with suppress_exception:
                    cause_strings.append(unicode(c))

            output += "caused by\n\t" + "and caused by\n\t".join(cause_strings)

        return output

Beispiel #46

0

Datei anzeigen

Datei: list_usingPythonList.py Projekt: davehunt/ActiveData

    def update(self, command):
        """
        EXPECTING command == {"set":term, "clear":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS A JSON EXPRESSION FILTER
        """
        command = wrap(command)
        command_clear = listwrap(command["clear"])
        command_set = command.set.items()
        command_where = jx.get(command.where)

        for c in self.data:
            if command_where(c):
                for k in command_clear:
                    c[k] = None
                for k, v in command_set:
                    c[k] = v

Beispiel #47

0

Datei anzeigen

Datei: setop.py Projekt: davehunt/ActiveData

def is_deep(query):
    select = listwrap(query.select)
    if len(select) > 1:
        return False

    if aggregates[select[0].aggregate] not in ("none", "count"):
        return False

    if len(query.edges) <= 1:
        return False

    isDeep = len(split_field(
        query["from"].name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    if not isDeep:
        return False  # BETTER TO USE TERM QUERY

    return True

Beispiel #48

0

Datei anzeigen

Datei: list_usingPythonList.py Projekt: davehunt/ActiveData

    def groupby(self, keys, contiguous=False):
        try:
            keys = listwrap(keys)
            get_key = jx_expression_to_function(keys)
            if not contiguous:
                data = sorted(self.data, key=get_key)

            def _output():
                for g, v in itertools.groupby(data, get_key):
                    group = Dict()
                    for k, gg in zip(keys, g):
                        group[k] = gg
                    yield (group, wrap(list(v)))

            return _output()
        except Exception, e:
            Log.error("Problem grouping", e)

Beispiel #49

0

Datei anzeigen

Datei: logs.py Projekt: mozilla/ActiveData-ETL

    def error(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,  # stack trace offset (==1 if you do not want to report self)
        **more_params):
        """
        raise an exception with a trace for the cause too
        """
        if default_params and isinstance(
                listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        add_to_trace = False
        if cause == None:
            cause = []
        elif isinstance(cause, list):
            pass
        elif isinstance(cause, Except):
            cause = [cause]
        else:
            add_to_trace = True
            if hasattr(cause, "message") and cause.message:
                cause = [
                    Except(ERROR,
                           unicode(cause.message),
                           trace=extract_tb(stack_depth))
                ]
            else:
                cause = [
                    Except(ERROR,
                           unicode(cause),
                           trace=extract_tb(stack_depth))
                ]

        trace = extract_stack(1 + stack_depth)
        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(ERROR, template, params, cause, trace)
        raise e

Beispiel #50

0

Datei anzeigen

            def post_process(sql):
                result = self.db.query(sql)
                for s in listwrap(query.select):
                    if isinstance(s.value, Mapping):
                        for r in result:
                            r[s.name] = {}
                            for k, v in s.value:
                                r[s.name][k] = r[s.name + "." + k]
                                r[s.name + "." + k] = None

                    if isinstance(s.value, list):
                        # REWRITE AS TUPLE
                        for r in result:
                            r[s.name] = tuple(r[s.name + "," + str(i)] for i, ss in enumerate(s.value))
                            for i, ss in enumerate(s.value):
                                r[s.name + "," + str(i)] = None

                expand_json(result)
                return result

Beispiel #51

0

Datei anzeigen

Datei: jx.py Projekt: davehunt/ActiveData

def unique_index(data, keys=None, fail_on_dup=True):
    """
    RETURN dict THAT USES KEYS TO INDEX DATA
    ONLY ONE VALUE ALLOWED PER UNIQUE KEY
    """
    o = UniqueIndex(listwrap(keys), fail_on_dup=fail_on_dup)

    for d in data:
        try:
            o.add(d)
        except Exception, e:
            o.add(d)
            Log.error(
                "index {{index}} is not unique {{key}} maps to both {{value1}} and {{value2}}",
                index=keys,
                key=select([d], keys)[0],
                value1=o[d],
                value2=d,
                cause=e)

Beispiel #52

0

Datei anzeigen

def is_setop(es, query):
    if not any(map(es.cluster.version.startswith, ["1.4.", "1.5.", "1.6.", "1.7."])):
        return False

    select = listwrap(query.select)

    if not query.edges:
        isDeep = len(split_field(query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
        simpleAgg = AND([s.aggregate in ("count", "none") for s in select])  # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

        # NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE
        if simpleAgg or isDeep:
            return True
    else:
        isSmooth = AND((e.domain.type in ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
        if isSmooth:
            return True

    return False

Beispiel #53

0

Datei anzeigen

Datei: meta.py Projekt: klahnakoski/MoTreeherder

def _counting_query(c):
    if c.nested_path:
        return {
            "nested": {
                "path": listwrap(c.nested_path)[0]  # FIRST ONE IS LONGEST
            },
            "aggs": {
                "_nested": {
                    "cardinality": {
                        "field":
                        c.es_column,
                        "precision_threshold":
                        10
                        if c.type in _elasticsearch.ES_NUMERIC_TYPES else 100
                    }
                }
            }
        }
    else:
        return {"cardinality": {"field": c.es_column}}

Beispiel #54

0

Datei anzeigen

Datei: jx_usingMySQL.py Projekt: davehunt/ActiveData

    def _subquery(self, query, isolate=True, stacked=False):
        if isinstance(query, basestring):
            return self.db.quote_column(query), None
        if query.name:  # IT WOULD BE SAFER TO WRAP TABLE REFERENCES IN A TYPED OBJECT (Cube, MAYBE?)
            return self.db.quote_column(query.name), None

        if query.edges:
            # RETURN A CUBE
            sql, post = self._grouped(query, stacked)
        else:
            select = listwrap(query.select)
            if select[0].aggregate != "none":
                sql, post = self._aggop(query)
            else:
                sql, post = self._setop(query)

        if isolate:
            return "(\n" + sql + "\n) a\n", post
        else:
            return sql, post

Beispiel #55

0

Datei anzeigen

Datei: aggop.py Projekt: mozilla/ActiveData-ETL

def es_aggop(es, mvel, query):
    select = listwrap(query.select)
    FromES = build_es_query(query)

    isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
    if isSimple:
        return es_countop(es, query)  # SIMPLE, USE TERMS FACET INSTEAD

    value2facet = dict()  # ONLY ONE FACET NEEDED PER
    name2facet = dict()  # MAP name TO FACET WITH STATS

    for s in select:
        if s.value not in value2facet:
            if is_keyword(s.value):
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "field": s.value
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            else:
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "script":
                        es09.expressions.compile_expression(s.value, query)
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            value2facet[s.value] = s.name
        name2facet[s.name] = value2facet[s.value]

    data = es09.util.post(es, FromES, query.limit)

    matricies = {
        s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[
            aggregates[s.aggregate]])
        for s in select
    }
    cube = Cube(query.select, [], matricies)
    cube.frum = query
    return cube

Beispiel #56

0

Datei anzeigen

    def query(self, _query):
        try:
            if not self.ready:
                Log.error("Must use with clause for any instance of FromES")

            query = Query(_query, schema=self)

            # try:
            #     frum = self.get_columns(query["from"])
            #     mvel = _MVEL(frum)
            # except Exception, e:
            #     mvel = None
            #     Log.warning("TODO: Fix this", e)
            #
            for s in listwrap(query.select):
                if not aggregates1_4[s.aggregate]:
                    Log.error("ES can not aggregate " + self.select[0].name +
                              " because '" + self.select[0].aggregate +
                              "' is not a recognized aggregate")

            frum = query["from"]
            if isinstance(frum, Query):
                result = self.query(frum)
                q2 = query.copy()
                q2.frum = result
                return qb.run(q2)

            if is_aggsop(self._es, query):
                return es_aggsop(self._es, frum, query)
            if is_fieldop(self._es, query):
                return es_fieldop(self._es, query)
            if is_setop(self._es, query):
                return es_setop(self._es, query)

            Log.error("Can not handle")
        except Exception, e:
            e = Except.wrap(e)
            if "Data too large, data for" in e:
                http.post(self._es.cluster.path + "/_cache/clear")
                Log.error("Problem (Tried to clear Elasticsearch cache)", e)
            Log.error("problem", e)

Beispiel #57

0

Datei anzeigen

Datei: exceptions.py Projekt: davehunt/ActiveData

    def wrap(cls, e, stack_depth=0):
        if e == None:
            return Null
        elif isinstance(e, (list, Except)):
            return e
        elif isinstance(e, Mapping):
            e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)])
            return Except(**e)
        else:
            if hasattr(e, "message") and e.message:
                cause = Except(ERROR,
                               unicode(e.message),
                               trace=_extract_traceback(0))
            else:
                cause = Except(ERROR, unicode(e), trace=_extract_traceback(0))

            trace = extract_stack(
                stack_depth + 2
            )  # +2 = to remove the caller, and it's call to this' Except.wrap()
            cause.trace.extend(trace)
            return cause

Beispiel #58

0

Datei anzeigen

    def query(self, _query):
        try:
            query = QueryOp.wrap(_query, schema=self)

            for n in self.namespaces:
                query = n.convert(query)
            if self.typed:
                query = Typed().convert(query)

            for s in listwrap(query.select):
                if not aggregates1_4.get(s.aggregate):
                    Log.error(
                        "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
                        name=s.name,
                        aggregate=s.aggregate)

            frum = query["from"]
            if isinstance(frum, QueryOp):
                result = self.query(frum)
                q2 = query.copy()
                q2.frum = result
                return jx.run(q2)

            if is_deepop(self._es, query):
                return es_deepop(self._es, query)
            if is_aggsop(self._es, query):
                return es_aggsop(self._es, frum, query)
            if is_setop(self._es, query):
                return es_setop(self._es, query)
            if es09_setop.is_setop(query):
                return es09_setop.es_setop(self._es, None, query)
            if es09_aggop.is_aggop(query):
                return es09_aggop.es_aggop(self._es, None, query)
            Log.error("Can not handle")
        except Exception, e:
            e = Except.wrap(e)
            if "Data too large, data for" in e:
                http.post(self._es.cluster.path + "/_cache/clear")
                Log.error("Problem (Tried to clear Elasticsearch cache)", e)
            Log.error("problem", e)

Beispiel #59

0

Datei anzeigen

Datei: domains.py Projekt: davehunt/ActiveData

    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        self.type = "range"
        self.NULL = Null

        if self.partitions:
            # IGNORE THE min, max, interval
            if not self.key:
                Log.error("Must have a key value")

            parts = listwrap(self.partitions)
            for i, p in enumerate(parts):
                self.min = Math.min(self.min, p.min)
                self.max = Math.max(self.max, p.max)
                if p.dataIndex != None and p.dataIndex != i:
                    Log.error(
                        "Expecting `dataIndex` to agree with the order of the parts"
                    )
                if p[self.key] == None:
                    Log.error(
                        "Expecting all parts to have {{key}} as a property",
                        key=self.key)
                p.dataIndex = i

            # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
            for p, q in itertools.product(parts, parts):
                if p.min <= q.min and q.min < p.max:
                    Log.error("partitions overlap!")

            self.partitions = parts
            return
        elif any([self.min == None, self.max == None, self.interval == None]):
            Log.error("Can not handle missing parameter")

        self.key = "min"
        self.partitions = wrap([{
            "min": v,
            "max": v + self.interval,
            "dataIndex": i
        } for i, v in enumerate(frange(self.min, self.max, self.interval))])

Beispiel #60

0

Datei anzeigen

Datei: aggop.py Projekt: mozilla/ActiveData-ETL

def es_countop(es, mvel, query):
    """
    RETURN SINGLE COUNT
    """
    select = listwrap(query.select)
    FromES = build_es_query(query)
    for s in select:

        if is_keyword(s.value):
            FromES.facets[s.name] = {
                "terms": {
                    "field": s.value,
                    "size": query.limit,
                },
                "facet_filter": {
                    "exists": {
                        "field": s.value
                    }
                }
            }
        else:
            # COMPLICATED value IS PROBABLY A SCRIPT, USE IT
            FromES.facets[s.name] = {
                "terms": {
                    "script_field":
                    es09.expressions.compile_expression(s.value, query),
                    "size":
                    200000
                }
            }

    data = es09.util.post(es, FromES, query.limit)

    matricies = {}
    for s in select:
        matricies[s.name] = Matrix(value=data.hits.facets[s.name].total)

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube