Python unwraplist Examples, pyLibrary.dot.unwraplist Python Examples

Example #1

0

Show file

File: app.py Project: klahnakoski/MoTreeherder

    def get_treeherder_job(self):
        try:
            with Timer("Process Request"):
                args = Dict(**flask.request.args)

                # IS THE branch/revision PENDING?

                result = self.get_markup(unwraplist(args.branch),
                                         unwraplist(args.revision),
                                         unwraplist(args.task_id),
                                         unwraplist(args.buildername),
                                         unwraplist(args.timestamp))

                response_data = convert.unicode2utf8(
                    convert.value2json(result))
                return Response(response_data,
                                status=200,
                                headers={
                                    "access-control-allow-origin": "*",
                                    "content-type": "text/plain"
                                })
        except Exception, e:
            e = Except.wrap(e)
            Log.warning("Could not process", cause=e)
            e = e.as_dict()

            return Response(convert.unicode2utf8(convert.value2json(e)),
                            status=400,
                            headers={
                                "access-control-allow-origin": "*",
                                "content-type": "application/json"
                            })

Example #2

0

Show file

File: setop.py Project: klahnakoski/esReplicate

def format_list(T, select, query=None):
    data = []
    if isinstance(query.select, list):
        for row in T:
            r = Dict()
            for s in select:
                r[s.put.name][s.put.child] = unwraplist(row[s.pull])
            data.append(r if r else None)
    elif isinstance(query.select.value, LeavesOp):
        for row in T:
            r = Dict()
            for s in select:
                r[s.put.name][s.put.child] = unwraplist(row[s.pull])
            data.append(r if r else None)
    else:
        for row in T:
            r = Dict()
            for s in select:
                r[s.put.child] = unwraplist(row[s.pull])
            data.append(r if r else None)

    return Dict(
        meta={"format": "list"},
        data=data
    )

Example #3

0

Show file

File: logs.py Project: klahnakoski/intermittents

    def warning(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,       # stack trace offset (==1 if you do not want to report self)
        **more_params
    ):
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = extract_stack(stack_depth + 1)

        e = Except(WARNING, template, params, cause, trace)
        Log.note(
            unicode(e),
            {
                "warning": {# REDUNDANT INFO
                    "template": template,
                    "params": params,
                    "cause": cause,
                    "trace": trace
                }
            },
            stack_depth=stack_depth + 1
        )

Example #4

0

Show file

File: logs.py Project: davehunt/ActiveData

    def error(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        **more_params
    ):
        """
        raise an exception with a trace for the cause too

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        add_to_trace = False
        cause = wrap(unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)]))
        trace = exceptions.extract_stack(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(exceptions.ERROR, template, params, cause, trace)
        raise e

Example #5

0

Show file

File: lists.py Project: klahnakoski/MoDevETL

def _get_schema_from_list(frum, columns, prefix, nested_path):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    names = {}
    for d in frum:
        for name, value in d.items():
            agg_type = names.get(name, "undefined")
            this_type = _type_to_name[value.__class__]
            new_type = _merge_type[agg_type][this_type]
            names[name] = new_type

            if this_type == "object":
                _get_schema_from_list([value], columns, prefix + [name], nested_path)
            elif this_type == "nested":
                np = listwrap(nested_path)
                newpath = unwraplist([".".join((np[0], name))]+np)
                _get_schema_from_list(value, columns, prefix + [name], newpath)

    for n, t in names.items():
        full_name = ".".join(prefix + [n])
        column = Column(
            table=".",
            name=full_name,
            abs_name=full_name,
            type=t,
            nested_path=nested_path
        )
        columns[column.name] = column

Example #6

0

Show file

File: logs.py Project: mozilla/ActiveData-ETL

    def warning(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,  # stack trace offset (==1 if you do not want to report self)
        **more_params):
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = extract_stack(stack_depth + 1)

        e = Except(WARNING, template, params, cause, trace)
        Log.note(
            unicode(e),
            {
                "warning": {  # REDUNDANT INFO
                    "template": template,
                    "params": params,
                    "cause": cause,
                    "trace": trace
                }
            },
            stack_depth=stack_depth + 1)

Example #7

0

Show file

def format_table(T, select, query=None):
    data = []
    num_columns = (Math.MAX(select.put.index) + 1)
    for row in T:
        r = [None] * num_columns
        for s in select:
            value = unwraplist(row[s.pull])

            if value == None:
                continue

            index, child = s.put.index, s.put.child
            if child == ".":
                r[index] = value
            else:
                if r[index] is None:
                    r[index] = Dict()
                r[index][child] = value

        data.append(r)

    header = [None] * num_columns
    for s in select:
        if header[s.put.index]:
            continue
        header[s.put.index] = s.name

    return Dict(
        meta={"format": "table"},
        header=header,
        data=data
    )

Example #8

0

Show file

def format_list(T, select, source):
    data = []
    for row in T:
        r = Dict()
        for s in select:
            if s.value == ".":
                r[s.name] = row[source]
            else:
                if source == "_source":
                    r[s.name] = unwraplist(row[source][s.value])
                elif isinstance(s.value, basestring):  # fields
                    r[s.name] = unwraplist(row[source][literal_field(s.value)])
                else:
                    r[s.name] = unwraplist(row[source][literal_field(s.name)])
        data.append(r)
    return Dict(meta={"format": "list"}, data=data)

Example #9

0

Show file

    def warning(cls,
                template,
                default_params={},
                cause=None,
                stack_depth=0,
                log_context=None,
                **more_params):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        if "values" in more_params.keys():
            Log.error("Can not handle a logging parameter by name `values`")
        params = dict(unwrap(default_params), **more_params)
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.WARNING, template, params, cause, trace)
        Log.note("{{error|unicode}}",
                 error=e,
                 log_context=set_default({"context": exceptions.WARNING},
                                         log_context),
                 stack_depth=stack_depth + 1)

Example #10

0

Show file

def _get_schema_from_list(frum, columns, prefix, nested_path):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    names = {}
    for d in frum:
        for name, value in d.items():
            agg_type = names.get(name, "undefined")
            this_type = _type_to_name[value.__class__]
            new_type = _merge_type[agg_type][this_type]
            names[name] = new_type

            if this_type == "object":
                _get_schema_from_list([value], columns, prefix + [name],
                                      nested_path)
            elif this_type == "nested":
                np = listwrap(nested_path)
                newpath = unwraplist([".".join((np[0], name))] + np)
                _get_schema_from_list(value, columns, prefix + [name], newpath)

    for n, t in names.items():
        full_name = ".".join(prefix + [n])
        column = Column(name=full_name,
                        table=".",
                        es_column=full_name,
                        es_index=".",
                        type=t,
                        nested_path=nested_path)
        columns[column.name] = column

Example #11

0

Show file

File: treeherder.py Project: klahnakoski/MoTreeherder

def _scrub(record, name):
    value = record[name]
    record[name] = None
    if value == "-" or value == "":
        return None
    else:
        return unwraplist(value)

Example #12

0

Show file

def _normalize_edge(edge, schema=None):
    if not _Column:
        _late_import()

    if edge == None:
        Log.error("Edge has no value, or expression is empty")
    elif isinstance(edge, basestring):
        if schema:
            try:
                e = schema[edge]
            except Exception, e:
                e = None
            e = unwraplist(e)
            if e and not isinstance(e, (_Column, set, list)):
                if isinstance(e, _Column):
                    return Dict(name=edge,
                                value=jx_expression(edge),
                                allowNulls=True,
                                domain=_normalize_domain(domain=e,
                                                         schema=schema))
                elif isinstance(e.fields, list) and len(e.fields) == 1:
                    return Dict(name=e.name,
                                value=jx_expression(e.fields[0]),
                                allowNulls=True,
                                domain=e.getDomain())
                else:
                    return Dict(name=e.name,
                                allowNulls=True,
                                domain=e.getDomain())
        return Dict(name=edge,
                    value=jx_expression(edge),
                    allowNulls=True,
                    domain=_normalize_domain(schema=schema))

Example #13

0

Show file

File: logs.py Project: klahnakoski/MoTreeherder

    def error(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        **more_params
    ):
        """
        raise an exception with a trace for the cause too

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        add_to_trace = False
        cause = wrap(unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)]))
        trace = exceptions.extract_stack(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(exceptions.ERROR, template, params, cause, trace)
        raise e

Example #14

0

Show file

File: convert.py Project: davehunt/ActiveData

def list2cube(rows, column_names=None):
    if column_names:
        keys = column_names
    else:
        columns = set()
        for r in rows:
            columns |= set(r.keys())
        keys = list(columns)

    data = {k: [] for k in keys}
    output = wrap({
        "meta": {"format": "cube"},
        "edges": [
            {
                "name": "rownum",
                "domain": {"type": "rownum", "min": 0, "max": len(rows), "interval": 1}
            }
        ],
        "data": data
    })

    for r in rows:
        for k in keys:
            data[k].append(unwraplist(r[k]))

    return output

Example #15

0

Show file

File: logs.py Project: klahnakoski/MoTreeherder

    def warning(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        if "values" in more_params.keys():
            Log.error("Can not handle a logging parameter by name `values`")
        params = dict(unwrap(default_params), **more_params)
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.WARNING, template, params, cause, trace)
        Log.note(
            "{{error|unicode}}",
            error=e,
            log_context=set_default({"context": exceptions.WARNING}, log_context),
            stack_depth=stack_depth + 1
        )

Example #16

0

Show file

File: treeherder.py Project: klahnakoski/MoTreeherder

def _scrub(record, name):
    value = record[name]
    record[name] = None
    if value == "-" or value == "":
        return None
    else:
        return unwraplist(value)

Example #17

0

Show file

File: setop.py Project: klahnakoski/esReplicate

def format_table(T, select, query=None):
    data = []
    num_columns = (Math.MAX(select.put.index) + 1)
    for row in T:
        r = [None] * num_columns
        for s in select:
            value = unwraplist(row[s.pull])

            if value == None:
                continue

            index, child = s.put.index, s.put.child
            if child == ".":
                r[index] = value
            else:
                if r[index] is None:
                    r[index] = Dict()
                r[index][child] = value

        data.append(r)

    header = [None] * num_columns
    for s in select:
        if header[s.put.index]:
            continue
        header[s.put.index] = s.name

    return Dict(
        meta={"format": "table"},
        header=header,
        data=data
    )

Example #18

0

Show file

File: logs.py Project: klahnakoski/MoDevETL

    def error(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        **more_params
    ):
        """
        raise an exception with a trace for the cause too
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        add_to_trace = False
        cause = unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)])
        trace = extract_stack(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(ERROR, template, params, cause, trace)
        raise e

Example #19

0

Show file

File: aggs.py Project: davehunt/ActiveData

def get_decoders_by_depth(query):
    """
    RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH
    """
    schema = query.frum
    output = DictList()
    for e in wrap(coalesce(query.edges, query.groupby, [])):
        if e.value != None and not isinstance(e.value, NullOp):
            e = e.copy()
            vars_ = e.value.vars()

            for v in vars_:
                if not schema[v]:
                    Log.error("{{var}} does not exist in schema", var=v)

            e.value = e.value.map({schema[v].name: schema[v].es_column for v in vars_})
        elif e.range:
            e = e.copy()
            min_ = e.range.min
            max_ = e.range.max
            vars_ = min_.vars() | max_.vars()

            for v in vars_:
                if not schema[v]:
                    Log.error("{{var}} does not exist in schema", var=v)

            map_ = {schema[v].name: schema[v].es_column for v in vars_}
            e.range = {
                "min": min_.map(map_),
                "max": max_.map(map_)
            }
        elif e.domain.dimension:
            vars_ = e.domain.dimension.fields
            e.domain.dimension = e.domain.dimension.copy()
            e.domain.dimension.fields = [schema[v].es_column for v in vars_]
        elif all(e.domain.partitions.where):
            vars_ = set()
            for p in e.domain.partitions:
                vars_ |= p.where.vars()

        try:
            depths = set(len(schema[v].nested_path)-1 for v in vars_)
            if -1 in depths:
                Log.error(
                    "Do not know of column {{column}}",
                    column=unwraplist([v for v in vars_ if schema[v]==None])
                )
            if len(depths) > 1:
                Log.error("expression {{expr}} spans tables, can not handle", expr=e.value)
            max_depth = Math.MAX(depths)
            while len(output) <= max_depth:
                output.append([])
        except Exception, e:
            # USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY
            max_depth = 0
            output.append([])

        limit = 0
        output[max_depth].append(AggsDecoder(e, query, limit))

Example #20

0

Show file

File: list_usingPythonList.py Project: davehunt/ActiveData

 def to_dict(self):
     return wrap({
         "meta": {
             "format": "list"
         },
         "data": [{k: unwraplist(v)
                   for k, v in row.items()} for row in self.data]
     })

Example #21

0

Show file

File: setop.py Project: klahnakoski/Activedata-ETL

def format_cube(T, select, source):
    matricies = {}
    for s in select:
        try:
            if s.value == ".":
                matricies[s.name] = Matrix.wrap(T.select(source))
            elif isinstance(s.value, list):
                matricies[s.name] = Matrix.wrap([tuple(unwraplist(t[source][ss]) for ss in s.value) for t in T])
            else:
                if source == "_source":
                    matricies[s.name] = Matrix.wrap([unwraplist(t[source][s.value]) for t in T])

                elif isinstance(s.value, basestring):  # fields
                    matricies[s.name] = Matrix.wrap([unwraplist(t[source].get(s.value)) for t in T])
                else:
                    matricies[s.name] = Matrix.wrap([unwraplist(t[source].get(s.name)) for t in T])
        except Exception, e:
            Log.error("", e)

Example #22

0

Show file

File: setop.py Project: klahnakoski/Activedata-ETL

def format_list(T, select, source):
    data = []
    for row in T:
        r = Dict()
        for s in select:
            if s.value == ".":
                r[s.name] = row[source]
            else:
                if source=="_source":
                    r[s.name] = unwraplist(row[source][s.value])
                elif isinstance(s.value, basestring):  # fields
                    r[s.name] = unwraplist(row[source][literal_field(s.value)])
                else:
                    r[s.name] = unwraplist(row[source][literal_field(s.name)])
        data.append(r)
    return Dict(
        meta={"format": "list"},
        data=data
    )

Example #23

0

Show file

def format_list(T, select, query=None):
    data = []
    if isinstance(query.select, list) or (isinstance(query.select.value, basestring) and query.select.value.endswith("*")):
        for row in T:
            r = Dict()
            for s in select:
                r[s.put.name][s.put.child] = unwraplist(row[s.pull])
            data.append(r if r else None)
    else:
        for row in T:
            r = Dict()
            for s in select:
                r[s.put.name][s.put.child] = unwraplist(row[s.pull])
            data.append(r if r else None)

    return Dict(
        meta={"format": "list"},
        data=data
    )

Example #24

0

Show file

File: list_usingPythonList.py Project: klahnakoski/TestFailures

def _get_schema_from_list(frum, columns, prefix, nested_path, name_to_column):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix)
            column = name_to_column.get(full_name)
            if not column:
                column = Column(
                    name=full_name,
                    table=".",
                    es_column=full_name,
                    es_index=".",
                    type="undefined",
                    nested_path=nested_path
                )
                columns[full_name] = column
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix + [name])
                column = name_to_column.get(full_name)
                if not column:
                    column = Column(
                        name=full_name,
                        table=".",
                        es_column=full_name,
                        es_index=".",
                        type="undefined",
                        nested_path=nested_path
                    )
                columns[full_name] = column
                if isinstance(value, list):
                    if len(value)==0:
                        this_type = "undefined"
                    elif len(value)==1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], columns, prefix + [name], nested_path, name_to_column)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist([join_field(split_field(np[0])+[name])]+np)
                    _get_schema_from_list(value, columns, prefix + [name], newpath, name_to_column)

Example #25

0

Show file

File: list_usingPythonList.py Project: davehunt/ActiveData

def _get_schema_from_list(frum, columns, prefix, nested_path, name_to_column):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix)
            column = name_to_column.get(full_name)
            if not column:
                column = Column(name=full_name,
                                table=".",
                                es_column=full_name,
                                es_index=".",
                                type="undefined",
                                nested_path=nested_path)
                columns[full_name] = column
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix + [name])
                column = name_to_column.get(full_name)
                if not column:
                    column = Column(name=full_name,
                                    table=".",
                                    es_column=full_name,
                                    es_index=".",
                                    type="undefined",
                                    nested_path=nested_path)
                columns[full_name] = column
                if isinstance(value, list):
                    if len(value) == 0:
                        this_type = "undefined"
                    elif len(value) == 1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], columns, prefix + [name],
                                          nested_path, name_to_column)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist(
                        [join_field(split_field(np[0]) + [name])] + np)
                    _get_schema_from_list(value, columns, prefix + [name],
                                          newpath, name_to_column)

Example #26

0

Show file

def format_table(T, select, source):
    header = [s.name for s in select]
    map = {s.name: i
           for i, s in enumerate(select)}  # MAP FROM name TO COLUMN INDEX
    data = []
    for row in T:
        r = [None] * len(header)
        for s in select:
            if s.value == ".":
                r[map[s.name]] = row[source]
            else:
                if source == "_source":
                    r[map[s.name]] = unwraplist(row[source][s.value])
                elif isinstance(s.value, basestring):  # fields
                    r[map[s.name]] = unwraplist(row[source][literal_field(
                        s.value)])
                else:
                    r[map[s.name]] = unwraplist(row[source][literal_field(
                        s.name)])
        data.append(r)
    return Dict(meta={"format": "table"}, header=header, data=data)

Example #27

0

Show file

File: convert.py Project: klahnakoski/MoTreeherder

def list2table(rows, column_names=None):
    if column_names:
        keys = list(set(column_names))
    else:
        columns = set()
        for r in rows:
            columns |= set(r.keys())
        keys = list(columns)

    output = [[unwraplist(r[k]) for k in keys] for r in rows]

    return wrap({"meta": {"format": "table"}, "header": keys, "data": output})

Example #28

0

Show file

File: jx_usingES.py Project: klahnakoski/esReplicate

    def update(self, command):
        """
        EXPECTING command == {"set":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS AN ES FILTER
        """
        command = wrap(command)
        schema = self._es.get_schema()

        # GET IDS OF DOCUMENTS
        results = self._es.search(
            {
                "fields": listwrap(schema._routing.path),
                "query": {
                    "filtered": {"query": {"match_all": {}}, "filter": jx_expression(command.where).to_esfilter()}
                },
                "size": 200000,
            }
        )

        # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
        scripts = DictList()
        for k, v in command.set.items():
            if not is_keyword(k):
                Log.error("Only support simple paths for now")
            if isinstance(v, Mapping) and v.doc:
                scripts.append({"doc": v.doc})
            else:
                scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby()})

        if results.hits.hits:
            updates = []
            for h in results.hits.hits:
                for s in scripts:
                    updates.append(
                        {
                            "update": {
                                "_id": h._id,
                                "_routing": unwraplist(h.fields[literal_field(schema._routing.path)]),
                            }
                        }
                    )
                    updates.append(s)
            content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode("utf-8")
            response = self._es.cluster.post(
                self._es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"}
            )
            if response.errors:
                Log.error(
                    "could not update: {{error}}",
                    error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)],
                )

Example #29

0

Show file

File: setop.py Project: klahnakoski/Activedata-ETL

def format_table(T, select, source):
    header = [s.name for s in select]
    map = {s.name: i for i, s in enumerate(select)}  # MAP FROM name TO COLUMN INDEX
    data = []
    for row in T:
        r = [None] * len(header)
        for s in select:
            if s.value == ".":
                r[map[s.name]] = row[source]
            else:
                if source == "_source":
                    r[map[s.name]] = unwraplist(row[source][s.value])
                elif isinstance(s.value, basestring):  # fields
                    r[map[s.name]] = unwraplist(row[source][literal_field(s.value)])
                else:
                    r[map[s.name]] = unwraplist(row[source][literal_field(s.name)])
        data.append(r)
    return Dict(
        meta={"format": "table"},
        header=header,
        data=data
    )

Example #30

0

Show file

def format_list(T, select, query=None):
    data = []
    if isinstance(query.select, list):
        for row in T:
            r = Dict()
            for s in select:
                r[s.put.name][s.put.child] = unwraplist(row[s.pull])
            data.append(r if r else None)
    elif isinstance(query.select.value, LeavesOp):
        for row in T:
            r = Dict()
            for s in select:
                r[s.put.name][s.put.child] = unwraplist(row[s.pull])
            data.append(r if r else None)
    else:
        for row in T:
            r = Dict()
            for s in select:
                r[s.put.child] = unwraplist(row[s.pull])
            data.append(r if r else None)

    return Dict(meta={"format": "list"}, data=data)

Example #31

0

Show file

File: query.py Project: klahnakoski/MoDataSubmission

        def map_edge(e, map_):
            partitions = unwraplist([set_default({"where": p.where.map(map_)}, p) for p in e.domain.partitions])

            domain = copy(e.domain)
            domain.where = e.domain.where.map(map_)
            domain.partitions = partitions

            edge = copy(e)
            edge.value = e.value.map(map_)
            edge.domain = domain
            if e.range:
                edge.range.min = e.range.min.map(map_)
                edge.range.max = e.range.max.map(map_)
            return edge

Example #32

0

Show file

File: app.py Project: klahnakoski/MoTreeherder

    def get_treeherder_job(self):
        try:
            with Timer("Process Request"):
                args = Dict(**flask.request.args)

                # IS THE branch/revision PENDING?

                result = self.get_markup(
                    unwraplist(args.branch),
                    unwraplist(args.revision),
                    unwraplist(args.task_id),
                    unwraplist(args.buildername),
                    unwraplist(args.timestamp)
                )

                response_data = convert.unicode2utf8(convert.value2json(result))
                return Response(
                    response_data,
                    status=200,
                    headers={
                        "access-control-allow-origin": "*",
                        "content-type": "text/plain"
                    }
                )
        except Exception, e:
            e = Except.wrap(e)
            Log.warning("Could not process", cause=e)
            e = e.as_dict()

            return Response(
                convert.unicode2utf8(convert.value2json(e)),
                status=400,
                headers={
                    "access-control-allow-origin": "*",
                    "content-type": "application/json"
                }
            )

Example #33

0

Show file

File: rename.py Project: davehunt/ActiveData

    def _convert_edge(self, edge):
        dim = self.dimensions[edge.value]
        if not dim:
            return edge

        if len(listwrap(dim.fields)) == 1:
            #TODO: CHECK IF EDGE DOMAIN AND DIMENSION DOMAIN CONFLICT
            new_edge = set_default({"value": unwraplist(dim.fields)}, edge)
            return new_edge
            new_edge.domain = dim.getDomain()

        edge = copy(edge)
        edge.value = None
        edge.domain = dim.getDomain()
        return edge

Example #34

0

Show file

File: rename.py Project: klahnakoski/MoDevETL

    def _convert_edge(self, edge):
        dim = self.dimensions[edge.value]
        if not dim:
            return edge

        if len(listwrap(dim.fields)) == 1:
            #TODO: CHECK IF EDGE DOMAIN AND DIMENSION DOMAIN CONFLICT
            new_edge = set_default({"value": unwraplist(dim.fields)}, edge)
            return new_edge
            new_edge.domain = dim.getDomain()

        edge = copy(edge)
        edge.value = None
        edge.domain = dim.getDomain()
        return edge

Example #35

0

Show file

def format_cube(T, select, source):
    matricies = {}
    for s in select:
        try:
            if s.value == ".":
                matricies[s.name] = Matrix.wrap(T.select(source))
            elif isinstance(s.value, list):
                matricies[s.name] = Matrix.wrap([
                    tuple(unwraplist(t[source][ss]) for ss in s.value)
                    for t in T
                ])
            else:
                if source == "_source":
                    matricies[s.name] = Matrix.wrap(
                        [unwraplist(t[source][s.value]) for t in T])

                elif isinstance(s.value, basestring):  # fields
                    matricies[s.name] = Matrix.wrap(
                        [unwraplist(t[source].get(s.value)) for t in T])
                else:
                    matricies[s.name] = Matrix.wrap(
                        [unwraplist(t[source].get(s.name)) for t in T])
        except Exception, e:
            Log.error("", e)

Example #36

0

Show file

File: convert.py Project: klahnakoski/TestFailures

def list2table(rows, column_names=None):
    if column_names:
        keys = list(set(column_names))
    else:
        columns = set()
        for r in rows:
            columns |= set(r.keys())
        keys = list(columns)

    output = [[unwraplist(r.get(k)) for k in keys] for r in rows]

    return wrap({
        "meta": {"format": "table"},
        "header": keys,
        "data": output
    })

Example #37

0

Show file

File: logs.py Project: klahnakoski/MoDataSubmission

    def fatal(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        SEND TO STDERR

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.ERROR, template, params, cause, trace)
        str_e = unicode(e)

        error_mode = cls.error_mode
        try:
            if not error_mode:
                cls.error_mode = True
                Log.note(
                    "{{error|unicode}}",
                    error=e,
                    log_context=set_default({"context": exceptions.FATAL}, log_context),
                    stack_depth=stack_depth + 1
                )
        except Exception:
            pass
        cls.error_mode = error_mode

        sys.stderr.write(str_e.encode('utf8'))

Example #38

0

Show file

File: query.py Project: klahnakoski/MoTreeherder

        def map_edge(e, map_):
            partitions = unwraplist([
                set_default({"where": p.where.map(map_)}, p)
                for p in e.domain.partitions
            ])

            domain = copy(e.domain)
            domain.where = e.domain.where.map(map_)
            domain.partitions = partitions

            edge = copy(e)
            edge.value = e.value.map(map_)
            edge.domain = domain
            if e.range:
                edge.range.min = e.range.min.map(map_)
                edge.range.max = e.range.max.map(map_)
            return edge

Example #39

0

Show file

File: exceptions.py Project: klahnakoski/MoDataSubmission

    def wrap(cls, e, stack_depth=0):
        if e == None:
            return Null
        elif isinstance(e, (list, Except)):
            return e
        elif isinstance(e, Mapping):
            e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)])
            return Except(**e)
        else:
            if hasattr(e, "message") and e.message:
                cause = Except(ERROR, unicode(e.message), trace=_extract_traceback(0))
            else:
                cause = Except(ERROR, unicode(e), trace=_extract_traceback(0))

            trace = extract_stack(stack_depth + 2)  # +2 = to remove the caller, and it's call to this' Except.wrap()
            cause.trace.extend(trace)
            return cause

Example #40

0

Show file

File: logs.py Project: davehunt/ActiveData

    def fatal(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        SEND TO STDERR

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.ERROR, template, params, cause, trace)
        str_e = unicode(e)

        error_mode = cls.error_mode
        with suppress_exception:
            if not error_mode:
                cls.error_mode = True
                Log.note(
                    "{{error|unicode}}",
                    error=e,
                    log_context=set_default({"context": exceptions.FATAL}, log_context),
                    stack_depth=stack_depth + 1
                )
        cls.error_mode = error_mode

        sys.stderr.write(str_e.encode('utf8'))

Example #41

0

Show file

File: lists.py Project: klahnakoski/esReplicate

def _get_schema_from_list(frum, columns, prefix, nested_path):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    names = {}
    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type!="object":
            agg_type = names.get(".", "undefined")
            names["."] = _merge_type[agg_type][row_type]
        else:
            for name, value in d.items():
                agg_type = names.get(name, "undefined")
                if isinstance(value, list):
                    if len(value)==0:
                        this_type = "undefined"
                    else:
                        this_type=_type_to_name[value[0].__class__]
                        if this_type=="object":
                            this_type="nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[agg_type][this_type]
                names[name] = new_type

                if this_type == "object":
                    _get_schema_from_list([value], columns, prefix + [name], nested_path)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist([join_field(split_field(np[0])+[name])]+np)
                    _get_schema_from_list(value, columns, prefix + [name], newpath)

    for n, t in names.items():
        full_name = ".".join(prefix + [n])
        column = Column(
            name=full_name,
            table=".",
            es_column=full_name,
            es_index=".",
            type=t,
            nested_path=nested_path
        )
        columns.append(column)

Example #42

0

Show file

File: exceptions.py Project: davehunt/ActiveData

    def wrap(cls, e, stack_depth=0):
        if e == None:
            return Null
        elif isinstance(e, (list, Except)):
            return e
        elif isinstance(e, Mapping):
            e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)])
            return Except(**e)
        else:
            if hasattr(e, "message") and e.message:
                cause = Except(ERROR,
                               unicode(e.message),
                               trace=_extract_traceback(0))
            else:
                cause = Except(ERROR, unicode(e), trace=_extract_traceback(0))

            trace = extract_stack(
                stack_depth + 2
            )  # +2 = to remove the caller, and it's call to this' Except.wrap()
            cause.trace.extend(trace)
            return cause

Example #43

0

Show file

File: query.py Project: klahnakoski/TestFailures

def _normalize_edge(edge, schema=None):
    if not _Column:
        _late_import()

    if edge == None:
        Log.error("Edge has no value, or expression is empty")
    elif isinstance(edge, basestring):
        if schema:
            try:
                e = schema[edge]
            except Exception, e:
                e = None
            e = unwraplist(e)
            if e and not isinstance(e, (_Column, set, list)):
                if isinstance(e, _Column):
                    return Dict(
                        name=edge,
                        value=jx_expression(edge),
                        allowNulls=True,
                        domain=_normalize_domain(domain=e, schema=schema)
                    )
                elif isinstance(e.fields, list) and len(e.fields) == 1:
                    return Dict(
                        name=e.name,
                        value=jx_expression(e.fields[0]),
                        allowNulls=True,
                        domain=e.getDomain()
                    )
                else:
                    return Dict(
                        name=e.name,
                        allowNulls=True,
                        domain=e.getDomain()
                    )
        return Dict(
            name=edge,
            value=jx_expression(edge),
            allowNulls=True,
            domain=_normalize_domain(schema=schema)
        )

Example #44

0

Show file

File: logs.py Project: klahnakoski/MoDevETL

    def fatal(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        **more_params
    ):
        """
        SEND TO STDERR
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = extract_stack(stack_depth + 1)

        e = Except(ERROR, template, params, cause, trace)
        str_e = unicode(e)

        error_mode = cls.error_mode
        try:
            if not error_mode:
                cls.error_mode = True
                Log.note(
                    "{{error}}",
                    error=e,
                    log_context={"context": WARNING},
                    stack_depth=stack_depth + 1
                )
        except Exception:
            pass
        cls.error_mode = error_mode

        sys.stderr.write(str_e)

Example #45

0

Show file

File: logs.py Project: klahnakoski/MoDevETL

    def warning(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = extract_stack(stack_depth + 1)

        e = Except(WARNING, template, params, cause, trace)
        Log.note(
            "{{error|unicode}}",
            error=e,
            log_context=set_default({"context": WARNING}, log_context),
            stack_depth=stack_depth + 1
        )

Example #46

0

Show file

    def update(self, command):
        """
        EXPECTING command == {"set":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS AN ES FILTER
        """
        command = wrap(command)
        schema = self._es.get_schema()

        # GET IDS OF DOCUMENTS
        results = self._es.search({
            "fields": listwrap(schema._routing.path),
            "query": {
                "filtered": {
                    "query": {
                        "match_all": {}
                    },
                    "filter": jx_expression(command.where).to_esfilter()
                }
            },
            "size": 200000
        })

        # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
        scripts = DictList()
        for k, v in command.set.items():
            if not is_keyword(k):
                Log.error("Only support simple paths for now")
            if isinstance(v, Mapping) and v.doc:
                scripts.append({"doc": v.doc})
            else:
                scripts.append({
                    "script":
                    "ctx._source." + k + " = " + jx_expression(v).to_ruby()
                })

        if results.hits.hits:
            updates = []
            for h in results.hits.hits:
                for s in scripts:
                    updates.append({
                        "update": {
                            "_id":
                            h._id,
                            "_routing":
                            unwraplist(h.fields[literal_field(
                                schema._routing.path)])
                        }
                    })
                    updates.append(s)
            content = ("\n".join(convert.value2json(c)
                                 for c in updates) + "\n").encode('utf-8')
            response = self._es.cluster.post(
                self._es.path + "/_bulk",
                data=content,
                headers={"Content-Type": "application/json"})
            if response.errors:
                Log.error("could not update: {{error}}",
                          error=[
                              e.error for i in response["items"]
                              for e in i.values() if e.status not in (200, 201)
                          ])

Example #47

0

Show file

        def _flatten(d, uid, path, nested_path):
            insertion = doc_collection[
                "." if not nested_path else nested_path[0]]
            row = uid.copy()
            insertion.rows.append(row)
            if isinstance(d, Mapping):
                for k, v in d.items():
                    cname = join_field(split_field(path) + [k])
                    ctype = get_type(v)
                    if ctype is None:
                        continue

                    c = unwraplist([
                        c for c in self.columns.get(cname, Null)
                        if c.type == ctype
                    ])
                    if not c:
                        c = Column(name=cname,
                                   table=self.name,
                                   type=ctype,
                                   es_column=typed_column(cname, ctype),
                                   es_index=self.name,
                                   nested_path=nested_path)
                        self.add_column(c)
                    insertion.active_columns.add(c)

                    if ctype == "nested":
                        row[cname] = "."
                        deeper = [cname] + listwrap(nested_path)
                        insertion = doc_collection.get(cname, None)
                        if not insertion:
                            doc_collection[cname] = Dict(active_columns=set(),
                                                         rows=[])
                        for i, r in enumerate(v):
                            child_uid = set_default(
                                {UID_PREFIX + "id" + unicode(len(uid)): i},
                                uid)
                            _flatten(r, child_uid, cname, deeper)
                    elif ctype == "object":
                        row[cname] = "."
                        _flatten(v, cname, nested_path)
                    elif c.type:
                        row[cname] = v
            else:
                k = "."
                v = d
                cname = join_field(split_field(path) + [k])
                ctype = get_type(v)
                if ctype is None:
                    return

                c = unwraplist([
                    c for c in self.columns.get(cname, Null) if c.type == ctype
                ])
                if not c:
                    c = Column(name=cname,
                               table=self.name,
                               type=ctype,
                               es_column=typed_column(cname, ctype),
                               es_index=self.name,
                               nested_path=nested_path)
                    self.add_column(c)
                insertion.active_columns.add(c)

                if ctype == "nested":
                    row[cname] = "."
                    deeper = [cname] + listwrap(nested_path)
                    insertion = doc_collection.get(cname, None)
                    if not insertion:
                        doc_collection[cname] = Dict(active_columns=set(),
                                                     rows=[])
                    for i, r in enumerate(v):
                        child_uid = set_default(
                            {UID_PREFIX + "id" + unicode(len(uid)): i}, uid)
                        _flatten(r, child_uid, cname, deeper)
                elif ctype == "object":
                    row[cname] = "."
                    _flatten(v, cname, nested_path)
                elif c.type:
                    row[cname] = v

Example #48

0

Show file

def parse_properties(parent_index_name, parent_query_path, esProperties):
    """
    RETURN THE COLUMN DEFINITIONS IN THE GIVEN esProperties OBJECT
    """
    from pyLibrary.queries.meta import Column

    columns = DictList()
    for name, property in esProperties.items():
        if parent_query_path:
            index_name, query_path = parent_index_name, join_field(split_field(parent_query_path) + [name])
        else:
            index_name, query_path = parent_index_name, name

        if property.type == "nested" and property.properties:
            # NESTED TYPE IS A NEW TYPE DEFINITION
            # MARKUP CHILD COLUMNS WITH THE EXTRA DEPTH
            self_columns = parse_properties(index_name, query_path, property.properties)
            for c in self_columns:
                c.nested_path = unwraplist([query_path] + listwrap(c.nested_path))
            columns.extend(self_columns)
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type="nested",
                nested_path=query_path
            ))

            continue

        if property.properties:
            child_columns = parse_properties(index_name, query_path, property.properties)
            columns.extend(child_columns)
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type="source" if property.enabled == False else "object"
            ))

        if property.dynamic:
            continue
        if not property.type:
            continue
        if property.type == "multi_field":
            property.type = property.fields[name].type  # PULL DEFAULT TYPE
            for i, (n, p) in enumerate(property.fields.items()):
                if n == name:
                    # DEFAULT
                    columns.append(Column(
                        table=index_name,
                        es_index=index_name,
                        name=query_path,
                        es_column=query_path,
                        type=p.type
                    ))
                else:
                    columns.append(Column(
                        table=index_name,
                        es_index=index_name,
                        name=query_path + "\\." + n,
                        es_column=query_path + "\\." + n,
                        type=p.type
                    ))
            continue

        if property.type in ["string", "boolean", "integer", "date", "long", "double"]:
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type=property.type
            ))
            if property.index_name and name != property.index_name:
                columns.append(Column(
                    table=index_name,
                    es_index=index_name,
                    es_column=query_path,
                    name=query_path,
                    type=property.type
                ))
        elif property.enabled == None or property.enabled == False:
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type="source" if property.enabled==False else "object"
            ))
        else:
            Log.warning("unknown type {{type}} for property {{path}}", type=property.type, path=query_path)

    return columns

Example #49

0

Show file

File: lists.py Project: klahnakoski/esReplicate

 def to_dict(self):
     return wrap({
         "meta": {"format": "list"},
         "data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data]
     })

Example #50

0

Show file

File: aggs.py Project: davehunt/ActiveData

def es_aggsop(es, frum, query):
    select = wrap([s.copy() for s in listwrap(query.select)])
    es_column_map = {c.name: unwraplist(c.es_column) for c in frum.schema.all_columns}

    es_query = Dict()
    new_select = Dict()  #MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
    formula = []
    for s in select:
        if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".":
            s.pull = "doc_count"
        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                if frum.typed:
                    # STATISITCAL AGGS IMPLY $value, WHILE OTHERS CAN BE ANYTHING
                    if s.aggregate in NON_STATISTICAL_AGGS:
                        #TODO: HANDLE BOTH $value AND $objects TO COUNT
                        Log.error("do not know how to handle")
                    else:
                        s.value.var = "$value"
                        new_select["$value"] += [s]
                else:
                    if s.aggregate in NON_STATISTICAL_AGGS:
                        #TODO:  WE SHOULD BE ABLE TO COUNT, BUT WE MUST *OR* ALL LEAF VALUES TO DO IT
                        Log.error("do not know how to handle")
                    else:
                        Log.error('Not expecting ES to have a value at "." which {{agg}} can be applied', agg=s.aggregate)
            elif s.aggregate == "count":
                s.value = s.value.map(es_column_map)
                new_select["count_"+literal_field(s.value.var)] += [s]
            else:
                s.value = s.value.map(es_column_map)
                new_select[literal_field(s.value.var)] += [s]
        else:
            formula.append(s)

    for canonical_name, many in new_select.items():
        representative = many[0]
        if representative.value.var == ".":
            Log.error("do not know how to handle")
        else:
            field_name = representative.value.var

        # canonical_name=literal_field(many[0].name)
        for s in many:
            if s.aggregate == "count":
                es_query.aggs[literal_field(canonical_name)].value_count.field = field_name
                s.pull = literal_field(canonical_name) + ".value"
            elif s.aggregate == "median":
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")

                es_query.aggs[key].percentiles.field = field_name
                es_query.aggs[key].percentiles.percents += [50]
                s.pull = key + ".values.50\.0"
            elif s.aggregate == "percentile":
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")
                if isinstance(s.percentile, basestring) or s.percetile < 0 or 1 < s.percentile:
                    Log.error("Expecting percentile to be a float from 0.0 to 1.0")
                percent = Math.round(s.percentile * 100, decimal=6)

                es_query.aggs[key].percentiles.field = field_name
                es_query.aggs[key].percentiles.percents += [percent]
                s.pull = key + ".values." + literal_field(unicode(percent))
            elif s.aggregate == "cardinality":
                # ES USES DIFFERENT METHOD FOR CARDINALITY
                key = literal_field(canonical_name + " cardinality")

                es_query.aggs[key].cardinality.field = field_name
                s.pull = key + ".value"
            elif s.aggregate == "stats":
                # REGULAR STATS
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].extended_stats.field = field_name

                # GET MEDIAN TOO!
                median_name = literal_field(canonical_name + " percentile")
                es_query.aggs[median_name].percentiles.field = field_name
                es_query.aggs[median_name].percentiles.percents += [50]

                s.pull = {
                    "count": stats_name + ".count",
                    "sum": stats_name + ".sum",
                    "min": stats_name + ".min",
                    "max": stats_name + ".max",
                    "avg": stats_name + ".avg",
                    "sos": stats_name + ".sum_of_squares",
                    "std": stats_name + ".std_deviation",
                    "var": stats_name + ".variance",
                    "median": median_name + ".values.50\.0"
                }
            elif s.aggregate == "union":
                # USE TERMS AGGREGATE TO SIMULATE union
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].terms.field = field_name
                es_query.aggs[stats_name].terms.size = Math.min(s.limit, MAX_LIMIT)
                s.pull = stats_name + ".buckets.key"
            else:
                # PULL VALUE OUT OF THE stats AGGREGATE
                es_query.aggs[literal_field(canonical_name)].extended_stats.field = field_name
                s.pull = literal_field(canonical_name) + "." + aggregates1_4[s.aggregate]

    for i, s in enumerate(formula):
        canonical_name = literal_field(s.name)
        abs_value = s.value.map(es_column_map)

        if s.aggregate == "count":
            es_query.aggs[literal_field(canonical_name)].value_count.script = abs_value.to_ruby()
            s.pull = literal_field(canonical_name) + ".value"
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")

            es_query.aggs[key].percentiles.script = abs_value.to_ruby()
            es_query.aggs[key].percentiles.percents += [50]
            s.pull = key + ".values.50\.0"
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = Math.round(s.percentile * 100, decimal=6)

            es_query.aggs[key].percentiles.script = abs_value.to_ruby()
            es_query.aggs[key].percentiles.percents += [percent]
            s.pull = key + ".values." + literal_field(unicode(percent))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"

            es_query.aggs[key].cardinality.script = abs_value.to_ruby()
            s.pull = key + ".value"
        elif s.aggregate == "stats":
            # REGULAR STATS
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].extended_stats.script = abs_value.to_ruby()

            # GET MEDIAN TOO!
            median_name = literal_field(canonical_name + " percentile")
            es_query.aggs[median_name].percentiles.script = abs_value.to_ruby()
            es_query.aggs[median_name].percentiles.percents += [50]

            s.pull = {
                "count": stats_name + ".count",
                "sum": stats_name + ".sum",
                "min": stats_name + ".min",
                "max": stats_name + ".max",
                "avg": stats_name + ".avg",
                "sos": stats_name + ".sum_of_squares",
                "std": stats_name + ".std_deviation",
                "var": stats_name + ".variance",
                "median": median_name + ".values.50\.0"
            }
        elif s.aggregate=="union":
            # USE TERMS AGGREGATE TO SIMULATE union
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].terms.script_field = abs_value.to_ruby()
            s.pull = stats_name + ".buckets.key"
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
            es_query.aggs[canonical_name].extended_stats.script = abs_value.to_ruby()

    decoders = get_decoders_by_depth(query)
    start = 0

    vars_ = query.where.vars()

    #<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
    split_where = split_expression_by_depth(query.where, schema=frum, map_=es_column_map)

    if len(split_field(frum.name)) > 1:
        if any(split_where[2::]):
            Log.error("Where clause is too deep")

        for d in decoders[1]:
            es_query = d.append_query(es_query, start)
            start += d.num_columns

        if split_where[1]:
            #TODO: INCLUDE FILTERS ON EDGES
            filter_ = simplify_esfilter(AndOp("and", split_where[1]).to_esfilter())
            es_query = Dict(
                aggs={"_filter": set_default({"filter": filter_}, es_query)}
            )

        es_query = wrap({
            "aggs": {"_nested": set_default(
                {
                    "nested": {
                        "path": frum.query_path
                    }
                },
                es_query
            )}
        })
    else:
        if any(split_where[1::]):
            Log.error("Where clause is too deep")

    for d in decoders[0]:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if split_where[0]:
        #TODO: INCLUDE FILTERS ON EDGES
        filter = simplify_esfilter(AndOp("and", split_where[0]).to_esfilter())
        es_query = Dict(
            aggs={"_filter": set_default({"filter": filter}, es_query)}
        )
    # </TERRIBLE SECTION>

    if not es_query:
        es_query = wrap({"query": {"match_all": {}}})

    es_query.size = 0

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        format_time = Timer("formatting")
        with format_time:
            decoders = [d for ds in decoders for d in ds]
            result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total)  # IT APPEARS THE OLD doc_count IS GONE

            formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
            if query.edges:
                output = formatter(decoders, result.aggregations, start, query, select)
            elif query.groupby:
                output = groupby_formatter(decoders, result.aggregations, start, query, select)
            else:
                output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.timing.formatting = format_time.duration
        output.meta.timing.es_search = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e)
        Log.error("Some problem", e)

Example #51

0

Show file

File: hg_mozilla_org.py Project: klahnakoski/MoHg

    def _load_all_in_push(self, revision, locale=None):
        # http://hg.mozilla.org/mozilla-central/json-pushes?full=1&changeset=57c461500a0c
        found_revision = copy(revision)
        if isinstance(found_revision.branch, basestring):
            lower_name = found_revision.branch.lower()
        else:
            lower_name = found_revision.branch.name.lower()

        if not lower_name:
            Log.error("Defective revision? {{rev|json}}", rev=found_revision.branch)

        b = found_revision.branch = self.branches[(lower_name, locale)]
        if not b:
            b = found_revision.branch = self.branches[(lower_name, DEFAULT_LOCALE)]
            if not b:
                Log.error("can not find branch ({{branch}}, {{locale}})", name=lower_name, locale=locale)
        if Date.now() - Date(b.etl.timestamp) > _OLD_BRANCH:
            self.branches = _hg_branches.get_branches(use_cache=True, settings=self.settings)

        url = found_revision.branch.url.rstrip("/") + "/json-pushes?full=1&changeset=" + found_revision.changeset.id
        Log.note(
            "Reading pushlog for revision ({{branch}}, {{locale}}, {{changeset}}): {{url}}",
            branch=found_revision.branch.name,
            locale=locale,
            changeset=found_revision.changeset.id,
            url=url
        )

        try:
            data = self._get_and_retry(url, found_revision.branch)

            revs = []
            output = None
            for index, _push in data.items():
                push = Push(id=int(index), date=_push.date, user=_push.user)

                for _, ids in qb.groupby(_push.changesets.node, size=200):
                    url_param = "&".join("node=" + c[0:12] for c in ids)

                    url = found_revision.branch.url.rstrip("/") + "/json-info?" + url_param
                    Log.note("Reading details from {{url}}", {"url": url})

                    raw_revs = self._get_and_retry(url, found_revision.branch)
                    for r in raw_revs.values():
                        rev = Revision(
                            branch=found_revision.branch,
                            index=r.rev,
                            changeset=Changeset(
                                id=r.node,
                                id12=r.node[0:12],
                                author=r.user,
                                description=r.description,
                                date=Date(r.date),
                                files=r.files
                            ),
                            parents=unwraplist(r.parents),
                            children=unwraplist(r.children),
                            push=push,
                            etl={"timestamp": Date.now().unix}
                        )
                        if r.node == found_revision.changeset.id:
                            output = rev
                        if r.node[0:12] == found_revision.changeset.id[0:12]:
                            output = rev
                        _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name + "-" + coalesce(rev.branch.locale, DEFAULT_LOCALE)
                        revs.append({"id": _id, "value": rev})
            self.es.extend(revs)
            return output
        except Exception, e:
            Log.error("Problem pulling pushlog from {{url}}", url=url, cause=e)

Example #52

0

Show file

File: elasticsearch.py Project: klahnakoski/MoDataSubmission

def parse_properties(parent_index_name, parent_query_path, esProperties):
    """
    RETURN THE COLUMN DEFINITIONS IN THE GIVEN esProperties OBJECT
    """
    from pyLibrary.queries.meta import Column

    columns = DictList()
    for name, property in esProperties.items():
        if parent_query_path:
            index_name, query_path = parent_index_name, join_field(split_field(parent_query_path) + [name])
        else:
            index_name, query_path = parent_index_name, name

        if property.type == "nested" and property.properties:
            # NESTED TYPE IS A NEW TYPE DEFINITION
            # MARKUP CHILD COLUMNS WITH THE EXTRA DEPTH
            self_columns = parse_properties(index_name, query_path, property.properties)
            for c in self_columns:
                c.nested_path = unwraplist([query_path] + listwrap(c.nested_path))
            columns.extend(self_columns)
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type="nested",
                nested_path=query_path
            ))

            continue

        if property.properties:
            child_columns = parse_properties(index_name, query_path, property.properties)
            columns.extend(child_columns)
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type="source" if property.enabled == False else "object"
            ))

        if property.dynamic:
            continue
        if not property.type:
            continue
        if property.type == "multi_field":
            property.type = property.fields[name].type  # PULL DEFAULT TYPE
            for i, (n, p) in enumerate(property.fields.items()):
                if n == name:
                    # DEFAULT
                    columns.append(Column(
                        table=index_name,
                        es_index=index_name,
                        name=query_path,
                        es_column=query_path,
                        type=p.type
                    ))
                else:
                    columns.append(Column(
                        table=index_name,
                        es_index=index_name,
                        name=query_path + "\\." + n,
                        es_column=query_path + "\\." + n,
                        type=p.type
                    ))
            continue

        if property.type in ["string", "boolean", "integer", "date", "long", "double"]:
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type=property.type
            ))
            if property.index_name and name != property.index_name:
                columns.append(Column(
                    table=index_name,
                    es_index=index_name,
                    es_column=query_path,
                    name=query_path,
                    type=property.type
                ))
        elif property.enabled == None or property.enabled == False:
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type="source" if property.enabled==False else "object"
            ))
        else:
            Log.warning("unknown type {{type}} for property {{path}}", type=property.type, path=query_path)

    return columns