Exemple #1
0
 def __init__(self, **desc):
     desc = wrap(desc)
     self._set_slots_to_null(self.__class__)
     set_default(self, desc)
     self.name = coalesce(desc.name, desc.type)
     self.isFacet = coalesce(desc.isFacet, False)
     self.dimension = Null
Exemple #2
0
    def append_query(self, es_query, start):
        self.start = start

        if not isinstance(self.edge.value, Variable):
            script_field = self.edge.value.to_ruby()
            missing = self.edge.value.missing().to_esfilter()

            output = wrap(
                {
                    "aggs": {
                        "_match": set_default(
                            {"terms": {"script_field": script_field, "size": self.domain.limit}}, es_query
                        ),
                        "_missing": set_default({"filter": missing}, es_query),
                    }
                }
            )
            return output

        output = wrap(
            {
                "aggs": {
                    "_match": set_default(
                        {"terms": {"field": self.edge.value.var, "size": self.domain.limit}}, es_query
                    ),
                    "_missing": set_default({"missing": {"field": self.edge.value}}, es_query),
                }
            }
        )
        return output
Exemple #3
0
def _delayed_imports():
    global _ListContainer
    global _meta
    global _containers


    from pyLibrary.queries import meta as _meta
    from pyLibrary.queries.containers.list_usingPythonList import ListContainer as _ListContainer
    from pyLibrary.queries import containers as _containers

    _ = _ListContainer
    _ = _meta
    _ = _containers

    try:
        from pyLibrary.queries.jx_usingMySQL import MySQL
    except Exception:
        MySQL = None

    from pyLibrary.queries.jx_usingES import FromES
    from pyLibrary.queries.meta import FromESMetadata

    set_default(_containers.type2container, {
        "elasticsearch": FromES,
        "mysql": MySQL,
        "memory": None,
        "meta": FromESMetadata
    })
Exemple #4
0
 def __init__(self, **desc):
     desc = wrap(desc)
     self._set_slots_to_null(self.__class__)
     set_default(self, desc)
     self.name = coalesce(desc.name, desc.type)
     self.isFacet = coalesce(desc.isFacet, False)
     self.dimension = Null
Exemple #5
0
def _delayed_imports():
    global type2container
    global _ListContainer
    global _Cube
    global _run
    global _Query
    global _Normal

    try:
        from pyLibrary.queries.jx_usingMySQL import MySQL as _MySQL
    except Exception:
        _MySQL = None

    from pyLibrary.queries.jx_usingES import FromES as _FromES
    from pyLibrary.queries.containers.list_usingPythonList import ListContainer as _ListContainer
    from pyLibrary.queries.containers.cube import Cube as _Cube
    from pyLibrary.queries.jx import run as _run
    from pyLibrary.queries.query import QueryOp as _Query
    from pyLibrary.queries.containers.list_usingSQLite import Table_usingSQLite

    set_default(type2container, {
        "elasticsearch": _FromES,
        "mysql": _MySQL,
        "sqlite": Table_usingSQLite,
        "memory": None
    })

    _ = _run
    _ = _Query
    _ = _Normal
Exemple #6
0
    def append_query(self, es_query, start):
        self.start = start

        parts = self.edge.domain.partitions
        filters = []
        notty = []

        for p in parts:
            filters.append(AndOp("and", [p.where]+notty).to_esfilter())
            notty.append(NotOp("not", p.where))

        missing_filter = None
        if self.edge.allowNulls:    # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
            missing_filter = set_default(
                {"filter": AndOp("and", notty).to_esfilter()},
                es_query
            )

        return wrap({"aggs": {
            "_match": set_default(
                {"filters": {"filters": filters}},
                es_query
            ),
            "_missing": missing_filter
        }})
Exemple #7
0
def _range_composer(edge, domain, es_query, to_float):
    # USE RANGES
    _min = coalesce(domain.min, MAX(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if isinstance(edge.value, Variable):
        calc = {"field": edge.value.var}
    else:
        calc = {"script_field": edge.value.to_ruby()}

    if edge.allowNulls:    # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
        missing_filter = set_default(
            {"filter": {"or": [
                OrOp("or", [
                    BinaryOp("lt", [edge.value, Literal(None, to_float(_min))]),
                    BinaryOp("gte", [edge.value, Literal(None, to_float(_max))]),
                ]).to_esfilter(),
                edge.value.missing().to_esfilter()
            ]}},
            es_query
        )
    else:
        missing_filter = None

    return wrap({"aggs": {
        "_match": set_default(
            {"range": calc},
            {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
            es_query
        ),
        "_missing": missing_filter
    }})
Exemple #8
0
    def query(self, _query):
        if not self.columns:
            self.columns = []
            alias_done = set()
            metadata = self._es.get_metadata()
            for index, meta in qb.sort(metadata.indices.items(), {
                    "value": 0,
                    "sort": -1
            }):
                for _, properties in meta.mappings.items():
                    columns = _parse_properties(index, properties.properties)
                    for c in columns:
                        c.cube = index
                        c.property = c.name
                        c.name = None
                        c.useSource = None

                    self.columns.extend(columns)
                    for a in meta.aliases:
                        # ONLY THE LATEST ALIAS IS CHOSEN TO GET COLUMNS
                        if a in alias_done:
                            continue
                        alias_done.add(a)
                        for c in columns:
                            self.columns.append(set_default(
                                {"cube": a}, c))  # ENSURE WE COPY

        return qb.run(
            set_default({
                "from": self.columns,
                "sort": ["cube", "property"]
            }, _query.as_dict()))
Exemple #9
0
def _delayed_imports():
    global type2container
    global _ListContainer
    global _Cube
    global _run
    global _Query
    global _Normal

    try:
        from pyLibrary.queries.jx_usingMySQL import MySQL as _MySQL
    except Exception:
        _MySQL = None

    from pyLibrary.queries.jx_usingES import FromES as _FromES
    from pyLibrary.queries.containers.list_usingPythonList import ListContainer as _ListContainer
    from pyLibrary.queries.containers.cube import Cube as _Cube
    from pyLibrary.queries.jx import run as _run
    from pyLibrary.queries.query import QueryOp as _Query
    from pyLibrary.queries.containers.list_usingSQLite import Table_usingSQLite

    set_default(
        type2container, {
            "elasticsearch": _FromES,
            "mysql": _MySQL,
            "sqlite": Table_usingSQLite,
            "memory": None
        })

    _ = _run
    _ = _Query
    _ = _Normal
def _get_branches_from_hg(settings):
    # GET MAIN PAGE
    response = http.get(settings.url)
    doc = BeautifulSoup(response.all_content)

    all_repos = doc("table")[1]
    branches = UniqueIndex(["name", "locale"], fail_on_dup=False)
    for i, r in enumerate(all_repos("tr")):
        dir, name = [v.text.strip() for v in r("td")]

        b = _get_single_branch_from_hg(settings, name, dir.lstrip("/"))
        branches.extend(b)

    # branches.add(set_default({"name": "release-mozilla-beta"}, branches["mozilla-beta", DEFAULT_LOCALE]))
    for b in list(branches["mozilla-beta", ]):
        branches.add(set_default({"name": "release-mozilla-beta"}, b))  # THIS IS THE l10n "name"
        b.url = "https://hg.mozilla.org/releases/mozilla-beta"          # THIS IS THE

    for b in list(branches["mozilla-release", ]):
        branches.add(set_default({"name": "release-mozilla-release"}, b))

    for b in list(branches["mozilla-aurora", ]):
        if b.locale == "en-US":
            continue
        branches.add(set_default({"name": "comm-aurora"}, b))
        # b.url = "https://hg.mozilla.org/releases/mozilla-aurora"

    return branches
Exemple #11
0
    def append_query(self, es_query, start):
        self.start = start

        if not isinstance(self.edge.value, Variable):
            script_field = self.edge.value.to_ruby()
            missing = self.edge.value.missing()

            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "script_field": script_field,
                        "size": self.domain.limit
                    }},
                    es_query
                ),
                "_missing": set_default({"filter": missing.to_esfilter()}, es_query) if missing else None
            }})
            return output

        output = wrap({"aggs": {
            "_match": set_default(
                {"terms": {
                    "field": self.edge.value.var,
                    "size": self.domain.limit
                }},
                es_query
            ),
            "_missing": set_default({"missing": {"field": self.edge.value}}, es_query)  # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
        }})
        return output
Exemple #12
0
def get_branches(settings):
    # GET MAIN PAGE
    response = http.get(settings.url)
    doc = BeautifulSoup(response.all_content)

    all_repos = doc("table")[1]
    branches = UniqueIndex(["name", "locale"], fail_on_dup=False)
    for i, r in enumerate(all_repos("tr")):
        dir, name = [v.text.strip() for v in r("td")]

        b = get_branch(settings, name, dir.lstrip("/"))
        branches.extend(b)

    # branches.add(set_default({"name": "release-mozilla-beta"}, branches["mozilla-beta", DEFAULT_LOCALE]))
    for b in list(branches["mozilla-beta", ]):
        branches.add(set_default({"name": "release-mozilla-beta"}, b))

    for b in list(branches["mozilla-release", ]):
        branches.add(set_default({"name": "release-mozilla-release"}, b))

    for b in list(branches["mozilla-aurora", ]):
        if b.locale == "en-US":
            continue
        branches.add(set_default({"name": "comm-aurora"}, b))
    return branches
Exemple #13
0
def _delayed_imports():
    global _ListContainer
    global _meta
    global _containers

    from pyLibrary.queries import meta as _meta
    from pyLibrary.queries.containers.list_usingPythonList import ListContainer as _ListContainer
    from pyLibrary.queries import containers as _containers

    _ = _ListContainer
    _ = _meta
    _ = _containers

    try:
        from pyLibrary.queries.jx_usingMySQL import MySQL
    except Exception:
        MySQL = None

    from pyLibrary.queries.jx_usingES import FromES
    from pyLibrary.queries.meta import FromESMetadata

    set_default(
        _containers.type2container, {
            "elasticsearch": FromES,
            "mysql": MySQL,
            "memory": None,
            "meta": FromESMetadata
        })
Exemple #14
0
def _range_composer(edge, domain, es_query, to_float):
    # USE RANGES
    _min = coalesce(domain.min, MAX(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if isinstance(edge.value, Variable):
        calc = {"field": edge.value.var}
    else:
        calc = {"script_field": edge.value.to_ruby()}

    if edge.allowNulls:    # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
        missing_filter = set_default(
            {"filter": {"or": [
                OrOp("or", [
                    InequalityOp("lt", [edge.value, Literal(None, to_float(_min))]),
                    InequalityOp("gte", [edge.value, Literal(None, to_float(_max))]),
                ]).to_esfilter(),
                edge.value.missing().to_esfilter()
            ]}},
            es_query
        )
    else:
        missing_filter = None

    return wrap({"aggs": {
        "_match": set_default(
            {"range": calc},
            {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
            es_query
        ),
        "_missing": missing_filter
    }})
Exemple #15
0
    def append_query(self, es_query, start):
        self.start = start

        parts = self.edge.domain.partitions
        filters = []
        notty = []

        for p in parts:
            filters.append(AndOp("and", [p.where]+notty).to_esfilter())
            notty.append(NotOp("not", p.where))

        missing_filter = None
        if self.edge.allowNulls:    # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
            missing_filter = set_default(
                {"filter": AndOp("and", notty).to_esfilter()},
                es_query
            )

        return wrap({"aggs": {
            "_match": set_default(
                {"filters": {"filters": filters}},
                es_query
            ),
            "_missing": missing_filter
        }})
Exemple #16
0
    def append_query(self, es_query, start):
        #TODO: USE "reverse_nested" QUERY TO PULL THESE

        self.start = start
        for i, v in enumerate(self.fields):
            nest = wrap({
                "aggs": {
                    "_match":
                    set_default(
                        {"terms": {
                            "field": v,
                            "size": self.domain.limit
                        }}, es_query)
                }
            })
            if self.edge.allowNulls:
                nest.aggs._missing = set_default(
                    {"missing": {
                        "field": v
                    }}, es_query
                )  # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
            es_query = nest

        if self.domain.where:
            filter = simplify_esfilter(self.domain.where)
            es_query = {
                "aggs": {
                    "_filter": set_default({"filter": filter}, es_query)
                }
            }

        return es_query
Exemple #17
0
def _range_composer(edge, domain, es_query, to_float):
    # USE RANGES
    _min = coalesce(domain.min, MAX(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if is_keyword(edge.value):
        calc = {"field": edge.value}
    else:
        calc = {"script": qb_expression_to_ruby(edge.value)}

    if is_keyword(edge.value):
        missing_range = {"or": [
            {"range": {edge.value: {"lt": to_float(_min)}}},
            {"range": {edge.value: {"gte": to_float(_max)}}}
        ]}
    else:
        missing_range = {"script": {"script": qb_expression_to_ruby({"or": [
            {"lt": [edge.value, to_float(_min)]},
            {"gt": [edge.value, to_float(_max)]},
        ]})}}

    return wrap({"aggs": {
        "_match": set_default(
            {"range": calc},
            {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
            es_query
        ),
        "_missing": set_default(
            {"filter": {"or": [
                missing_range,
                {"missing": {"field": get_all_vars(edge.value)}}
            ]}},
            es_query
        ),
    }})
Exemple #18
0
    def append_query(self, es_query, start):
        self.start = start

        if not isinstance(self.edge.value, Variable):
            script_field = self.edge.value.to_ruby()
            missing = self.edge.value.missing()

            output = wrap(
                {
                    "aggs": {
                        "_match": set_default(
                            {"terms": {"script_field": script_field, "size": self.domain.limit}}, es_query
                        ),
                        "_missing": set_default({"filter": missing.to_esfilter()}, es_query) if missing else None,
                    }
                }
            )
            return output

        output = wrap(
            {
                "aggs": {
                    "_match": set_default(
                        {"terms": {"field": self.edge.value.var, "size": self.domain.limit}}, es_query
                    ),
                    "_missing": set_default(
                        {"missing": {"field": self.edge.value}}, es_query
                    ),  # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
                }
            }
        )
        return output
Exemple #19
0
def es_setop(es, query):
    es_query, filters = es14.util.es_query_template(query.frum.name)
    set_default(filters[0], simplify_esfilter(query.where.to_esfilter()))
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort)
    es_query.fields = DictList()

    return extract_rows(es, es_query, query)
Exemple #20
0
def es_setop(es, query):
    es_query, filters = es14.util.es_query_template(query.frum.name)
    set_default(filters[0], simplify_esfilter(query.where.to_esfilter()))
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort)
    es_query.fields = DictList()

    return extract_rows(es, es_query, query)
def _delayed_imports():
    global type2container

    from pyLibrary.queries.qb_usingMySQL import MySQL
    from pyLibrary.queries.qb_usingES import FromES
    set_default(type2container, {
        "elasticsearch": FromES,
        "mysql": MySQL,
        "memory": None
    })
Exemple #22
0
def _delayed_imports():
    global type2container

    from pyLibrary.queries.qb_usingMySQL import MySQL
    from pyLibrary.queries.qb_usingES import FromES
    set_default(type2container, {
        "elasticsearch": FromES,
        "mysql": MySQL,
        "memory": None
    })
Exemple #23
0
 def _convert_clause(self, clause):
     """
     JSON QUERY EXPRESSIONS HAVE MANY CLAUSES WITH SIMILAR COLUMN DELCARATIONS
     """
     if clause == None:
         return None
     elif isinstance(clause, Mapping):
         return set_default({"value": self.convert(clause["value"])}, clause)
     else:
         return [set_default({"value": self.convert(c.value)}, c) for c in clause]
Exemple #24
0
 def append_query(self, es_query, start):
     self.start = start
     return wrap({"aggs": {
         "_match": set_default(
             {"terms": {
                 "field": self.edge.value,
                 "size": self.edge.domain.limit
             }},
             es_query
         ),
         "_missing": set_default({"missing": {"field": self.edge.value}}, es_query),
     }})
Exemple #25
0
    def _convert_clause(self, clause):
        """
        Qb QUERIES HAVE MANY CLAUSES WITH SIMILAR COLUMN DELCARATIONS
        """
        clause = wrap(clause)

        if clause == None:
            return None
        elif isinstance(clause, Mapping):
            return set_default({"value": self.convert(clause.value)}, clause)
        else:
            return [set_default({"value": self.convert(c.value)}, c) for c in clause]
Exemple #26
0
def parse_comment(bug, comment):
    bug = bug.copy()
    subtests = []
    lines = comment.comment.split('\n')
    for line in lines:
        if not line.strip():
            continue
        elif line.startswith('log: https://treeherder.mozilla.org'):
            bug.treeherder = line.split('log: ')[1]
            continue
        elif line.startswith('buildname'):
            bug.build.name = line.split('buildname: ')[1]
            continue
        elif line.startswith('repository: '):
            bug.branch.name = line.split('repository: ')[1]
            continue
        elif line.startswith('machine: '):
            bug.machine.name = line.split('machine: ')[1]
            continue
        elif line.startswith('who: '):
            continue
        elif line.startswith('revision'):
            try:
                bug.build.revision = line.split('revision: ')[1]
                continue
            except:
                Log.error("exception splitting bug {{bug_id}} line on 'revision: ', {{line}}", bug_id=bug.id, line=line)
        elif line.startswith('start_time'):
            bug.timestamp = Date(line.split('start_time: ')[1])
            continue
        elif line.startswith('submit_timestamp'):
            bug.timestamp = line.split('submit_timestamp: ')[1]
            continue


        parts = line.split("|")

        if len(parts) == 3:
            try:
                subtest = Dict()
                subtest.subtest = parse_status(parts[0])
                subtest.subtest.name = parts[1].strip()
                subtest.subtest.message = parts[2].strip()
                subtest.subtest.in_ad = any(subtest.subtest.message.find(t)>=0 for t in timeouts)
                set_default(subtest, bug)
                subtest.subtest.comment_line = line
                subtest.subtest.report_ts = Date(comment.modified_ts)
                subtests.append(subtest)
            except Exception, e:
                Log.note("IGNORED LINE {{bug_id}} {{line}}", line=line, bug_id=bug.bug_id)
        else:
            Log.note("IGNORED LINE {{bug_id}} {{line}}", line=line, bug_id=bug.bug_id)
Exemple #27
0
    def append_query(self, es_query, start):
        self.start = start
        for i, (k, v) in enumerate(self.fields):
            es_query = wrap({"aggs": {
                "_match": set_default({"terms": {"field": v}}, es_query),
                "_missing": set_default({"missing": {"field": v}}, es_query),
            }})

        if self.edge.domain.where:
            filter = simplify_esfilter(self.edge.domain.where)
            es_query = {"aggs": {"_filter": set_default({"filter": filter}, es_query)}}

        return es_query
Exemple #28
0
 def append_query(self, es_query, start):
     self.start = start
     return wrap({
         "aggs": {
             "_match":
             set_default({"terms": {
                 "field": self.edge.value
             }}, es_query),
             "_missing":
             set_default({"missing": {
                 "field": self.edge.value
             }}, es_query),
         }
     })
Exemple #29
0
    def append_query(self, es_query, start):
        # TODO: USE "reverse_nested" QUERY TO PULL THESE

        self.start = start
        for i, v in enumerate(self.fields):
            nest = wrap({"aggs": {"_match": set_default({"terms": {"field": v, "size": self.domain.limit}}, es_query)}})
            if self.edge.allowNulls:
                nest.aggs._missing = set_default({"missing": {"field": v}}, es_query)
            es_query = nest

        if self.domain.where:
            filter = simplify_esfilter(self.domain.where)
            es_query = {"aggs": {"_filter": set_default({"filter": filter}, es_query)}}

        return es_query
def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not type2container:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not config.default.settings:
            Log.error("expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info")

        settings = set_default(
            {
                "index": split_field(frum)[0],
                "name": frum,
            },
            config.default.settings
        )
        settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
        return type2container["elasticsearch"](settings)
    elif isinstance(frum, Mapping) and frum.type and type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))):
        from pyLibrary.queries.query import Query
        return Query(frum, schema=schema)
    else:
        return frum
Exemple #31
0
def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not type2container:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not config.default.settings:
            Log.error(
                "expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info"
            )

        settings = set_default({
            "index": split_field(frum)[0],
            "name": frum,
        }, config.default.settings)
        settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
        return type2container["elasticsearch"](settings)
    elif isinstance(frum, Mapping) and frum.type and type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"]
                                        or isinstance(frum["from"],
                                                      (list, set))):
        from pyLibrary.queries.query import Query
        return Query(frum, schema=schema)
    else:
        return frum
Exemple #32
0
    def unexpected(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        if cause and not isinstance(cause, Except):
            cause = Except(UNEXPECTED, unicode(cause), trace=_extract_traceback(0))

        trace = extract_stack(1)
        e = Except(UNEXPECTED, template, params, cause, trace)
        Log.note(
            "{{error}}",
            error=e,
            log_context=set_default({"context": WARNING}, log_context),
            stack_depth=stack_depth + 1
        )
Exemple #33
0
    def warning(cls,
                template,
                default_params={},
                cause=None,
                stack_depth=0,
                log_context=None,
                **more_params):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        if "values" in more_params.keys():
            Log.error("Can not handle a logging parameter by name `values`")
        params = dict(unwrap(default_params), **more_params)
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.WARNING, template, params, cause, trace)
        Log.note("{{error|unicode}}",
                 error=e,
                 log_context=set_default({"context": exceptions.WARNING},
                                         log_context),
                 stack_depth=stack_depth + 1)
Exemple #34
0
    def unexpected(cls,
                   template,
                   default_params={},
                   cause=None,
                   stack_depth=0,
                   log_context=None,
                   **more_params):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        if cause and not isinstance(cause, Except):
            cause = Except(exceptions.UNEXPECTED,
                           unicode(cause),
                           trace=exceptions._extract_traceback(0))

        trace = exceptions.extract_stack(1)
        e = Except(exceptions.UNEXPECTED, template, params, cause, trace)
        Log.note("{{error}}",
                 error=e,
                 log_context=set_default({"context": exceptions.WARNING},
                                         log_context),
                 stack_depth=stack_depth + 1)
Exemple #35
0
    def alarm(
        cls,
        template,
        default_params={},
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        # USE replace() AS POOR MAN'S CHILD TEMPLATE

        template = ("*" * 80) + "\n" + indent(template, prefix="** ").strip() + "\n" + ("*" * 80)
        Log.note(
            template,
            default_params=default_params,
            stack_depth=stack_depth + 1,
            log_context=set_default({"context": exceptions.ALARM}, log_context),
            **more_params
        )
Exemple #36
0
    def unexpected(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        if cause and not isinstance(cause, Except):
            cause = Except(exceptions.UNEXPECTED, unicode(cause), trace=exceptions._extract_traceback(0))

        trace = exceptions.extract_stack(1)
        e = Except(exceptions.UNEXPECTED, template, params, cause, trace)
        Log.note(
            "{{error}}",
            error=e,
            log_context=set_default({"context": exceptions.WARNING}, log_context),
            stack_depth=stack_depth + 1
        )
Exemple #37
0
    def alarm(
        cls,
        template,
        default_params={},
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        # USE replace() AS POOR MAN'S CHILD TEMPLATE

        template = ("*" * 80) + "\n" + indent(template, prefix="** ").strip() + "\n" + ("*" * 80)
        Log.note(
            template,
            default_params=default_params,
            stack_depth=stack_depth + 1,
            log_context=set_default({"context": exceptions.ALARM}, log_context),
            **more_params
        )
Exemple #38
0
    def warning(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        if "values" in more_params.keys():
            Log.error("Can not handle a logging parameter by name `values`")
        params = dict(unwrap(default_params), **more_params)
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.WARNING, template, params, cause, trace)
        Log.note(
            "{{error|unicode}}",
            error=e,
            log_context=set_default({"context": exceptions.WARNING}, log_context),
            stack_depth=stack_depth + 1
        )
Exemple #39
0
    def send(self, topic, message):
        """Publishes a pulse message to the proper exchange."""

        if not message:
            Log.error("Expecting a message")

        message._prepare()

        if not self.connection:
            self.connect()

        producer = Producer(
            channel=self.connection,
            exchange=Exchange(self.settings.exchange, type='topic'),
            routing_key=topic
        )

        # The message is actually a simple envelope format with a payload and
        # some metadata.
        final_data = Dict(
            payload=message.data,
            _meta=set_default({
                'exchange': self.settings.exchange,
                'routing_key': message.routing_key,
                'serializer': self.settings.serializer,
                'sent': time_to_string(datetime.datetime.now(timezone(self.settings.broker_timezone))),
                'count': self.count
            }, message.metadata)
        )

        producer.publish(jsons.scrub(final_data), serializer=self.settings.serializer)
        self.count += 1
Exemple #40
0
    def __new__(cls, e=None, query=None, *args, **kwargs):
        e.allowNulls = coalesce(e.allowNulls, True)

        if e.value and e.domain.type == "default":
            if query.groupby:
                return object.__new__(DefaultDecoder, e)

            if isinstance(e.value, basestring):
                Log.error("Expecting Variable or Expression, not plain string")

            if isinstance(e.value, TupleOp):
                # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
                # JUST PULL THE FIELDS
                if not all(isinstance(t, Variable) for t in e.value.terms):
                    Log.error("Can only handle variables in tuples")

                e.domain = Dict(dimension={"fields": e.value.terms})
                return object.__new__(DimFieldListDecoder, e)
            elif isinstance(e.value, Variable):
                cols = query.frum.get_columns()
                col = cols.filter(lambda c: c.name == e.value.var)[0]
                if not col:
                    return object.__new__(DefaultDecoder, e)
                limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT)

                if col.partitions != None:
                    e.domain = SimpleSetDomain(partitions=col.partitions[:limit:])
                else:
                    e.domain = set_default(DefaultDomain(limit=limit), e.domain.as_dict())
                    return object.__new__(DefaultDecoder, e)

            else:
                return object.__new__(DefaultDecoder, e)

        if e.value and e.domain.type in PARTITION:
            return object.__new__(SetDecoder, e)
        if isinstance(e.domain.dimension, Dimension):
            e.domain = e.domain.dimension.getDomain()
            return object.__new__(SetDecoder, e)
        if e.value and e.domain.type == "time":
            return object.__new__(TimeDecoder, e)
        if e.range:
            return object.__new__(GeneralRangeDecoder, e)
        if e.value and e.domain.type == "duration":
            return object.__new__(DurationDecoder, e)
        elif e.value and e.domain.type == "range":
            return object.__new__(RangeDecoder, e)
        elif not e.value and e.domain.dimension.fields:
            # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
            # JUST PULL THE FIELDS
            fields = e.domain.dimension.fields
            if isinstance(fields, Mapping):
                Log.error("No longer allowed: All objects are expressions")
            else:
                return object.__new__(DimFieldListDecoder, e)
        elif not e.value and all(e.domain.partitions.where):
            return object.__new__(GeneralSetDecoder, e)
        else:
            Log.error("domain type of {{type}} is not supported yet", type=e.domain.type)
Exemple #41
0
def _delayed_imports():
    global type2container
    global _ListContainer

    from pyLibrary.queries.containers.lists import ListContainer as _ListContainer
    _ = _ListContainer

    from pyLibrary.queries.qb_usingMySQL import MySQL
    from pyLibrary.queries.qb_usingES import FromES
    from pyLibrary.queries.meta import FromESMetadata

    set_default(type2container, {
        "elasticsearch": FromES,
        "mysql": MySQL,
        "memory": None,
        "meta": FromESMetadata
    })
Exemple #42
0
    def upsert_column(self, c):
        existing_columns = filter(lambda r: r.table == c.table and r.abs_name == c.abs_name, self.columns.data)
        if not existing_columns:
            self.columns.add(c)
            cols = filter(lambda r: r.table == "meta.columns", self.columns.data)
            for cc in cols:
                cc.partitions = cc.cardinality = cc.last_updated = None
            self.todo.add(c)
            self.todo.extend(cols)
        else:
            set_default(existing_columns[0], c)
            self.todo.add(existing_columns[0])

            # TEST CONSISTENCY
            for c, d in product(list(self.todo.queue), list(self.todo.queue)):
                if c.abs_name==d.abs_name and c.table==d.table and c!=d:
                    Log.error("")
Exemple #43
0
def _get_branches_from_hg(settings):
    # GET MAIN PAGE
    response = http.get(settings.url)
    doc = BeautifulSoup(response.all_content)

    all_repos = doc("table")[1]
    branches = UniqueIndex(["name", "locale"], fail_on_dup=False)
    for i, r in enumerate(all_repos("tr")):
        dir, name = [v.text.strip() for v in r("td")]

        b = _get_single_branch_from_hg(settings, name, dir.lstrip("/"))
        branches.extend(b)

    # branches.add(set_default({"name": "release-mozilla-beta"}, branches["mozilla-beta", DEFAULT_LOCALE]))
    for b in list(branches["mozilla-beta", ]):
        branches.add(set_default({"name": "release-mozilla-beta"},
                                 b))  # THIS IS THE l10n "name"
        b.url = "https://hg.mozilla.org/releases/mozilla-beta"  # THIS IS THE

    for b in list(branches["mozilla-release", ]):
        branches.add(set_default({"name": "release-mozilla-release"}, b))

    for b in list(branches["mozilla-aurora", ]):
        if b.locale == "en-US":
            continue
        branches.add(set_default({"name": "comm-aurora"}, b))
        # b.url = "https://hg.mozilla.org/releases/mozilla-aurora"

    for b in list(branches):
        if b.name.startswith("mozilla-esr"):
            branches.add(set_default({"name": "release-" + b.name},
                                     b))  # THIS IS THE l10n "name"
            b.url = "https://hg.mozilla.org/releases/" + b.name

    #CHECKS
    for b in branches:
        if b.name != b.name.lower():
            Log.error("Expecting lowercase name")
        if not b.locale:
            Log.error("Not expected")
        if not b.url.startswith("http"):
            Log.error("Expecting a valid url")
        if not b.etl.timestamp:
            Log.error("Expecting a timestamp")

    return branches
Exemple #44
0
 def query(self, _query):
     return self.meta.columns.query(
         QueryOp(
             set_default(
                 {
                     "from": self.meta.columns,
                     "sort": ["table", "name"]
                 }, _query.as_dict())))
Exemple #45
0
 def query(self, _query):
     return self.meta.columns.query(QueryOp(set_default(
         {
             "from": self.meta.columns,
             "sort": ["table", "name"]
         },
         _query.as_dict()
     )))
Exemple #46
0
    def __new__(cls, e=None, query=None, *args, **kwargs):
        if query.groupby:
            # GROUPBY ASSUMES WE IGNORE THE DOMAIN RANGE
            e.allowNulls = False
        else:
            e.allowNulls = coalesce(e.allowNulls, True)

        if e.value and e.domain.type == "default":
            if query.groupby:
                return object.__new__(DefaultDecoder, e)

            if isinstance(e.value, basestring):
                Log.error("Expecting Variable or Expression, not plain string")

            if isinstance(e.value, Variable):
                cols = query.frum.get_columns()
                col = cols.filter(lambda c: c.name == e.value.var)[0]
                if not col:
                    return object.__new__(DefaultDecoder, e)
                limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT)

                if col.partitions != None:
                    e.domain = SimpleSetDomain(
                        partitions=col.partitions[:limit:])
                else:
                    e.domain = set_default(DefaultDomain(limit=limit),
                                           e.domain.as_dict())
                    return object.__new__(DefaultDecoder, e)

            else:
                return object.__new__(DefaultDecoder, e)

        if e.value and e.domain.type in PARTITION:
            return object.__new__(SetDecoder, e)
        if isinstance(e.domain.dimension, Dimension):
            e.domain = e.domain.dimension.getDomain()
            return object.__new__(SetDecoder, e)
        if e.value and e.domain.type == "time":
            return object.__new__(TimeDecoder, e)
        if e.range:
            return object.__new__(GeneralRangeDecoder, e)
        if e.value and e.domain.type == "duration":
            return object.__new__(DurationDecoder, e)
        elif e.value and e.domain.type == "range":
            return object.__new__(RangeDecoder, e)
        elif not e.value and e.domain.dimension.fields:
            # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
            # JUST PULL THE FIELDS
            fields = e.domain.dimension.fields
            if isinstance(fields, Mapping):
                Log.error("No longer allowed: All objects are expressions")
            else:
                return object.__new__(DimFieldListDecoder, e)
        elif not e.value and all(e.domain.partitions.where):
            return object.__new__(GeneralSetDecoder, e)
        else:
            Log.error("domain type of {{type}} is not supported yet",
                      type=e.domain.type)
Exemple #47
0
def _replace_ref(node, url):
    if url.path.endswith("/"):
        url.path = url.path[:-1]

    if isinstance(node, Mapping):
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = URL(v)
            else:
                output[k] = _replace_ref(v, url)

        if not ref:
            return output

        node = output

        if not ref.scheme and not ref.path:
            # DO NOT TOUCH LOCAL REF YET
            output["$ref"] = ref
            return output

        if not ref.scheme:
            # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH
            # REQUIRES THE CURRENT DOCUMENT'S SCHEME
            ref.scheme = url.scheme

        # FIND THE SCHEME AND LOAD IT
        if ref.scheme in scheme_loaders:
            new_value = scheme_loaders[ref.scheme](ref, url)
        else:
            raise _Log.error("unknown protocol {{scheme}}", scheme=ref.scheme)

        if ref.fragment:
            new_value = dot.get_attr(new_value, ref.fragment)

        if DEBUG:
            _Log.note("Replace {{ref}} with {{new_value}}",
                      ref=ref,
                      new_value=new_value)

        if not output:
            output = new_value
        else:
            output = unwrap(set_default(output, new_value))

        if DEBUG:
            _Log.note("Return {{output}}", output=output)

        return output
    elif isinstance(node, list):
        output = [_replace_ref(n, url) for n in node]
        # if all(p[0] is p[1] for p in zip(output, node)):
        #     return node
        return output

    return node
def log_loop(settings, synch, queue, bucket, please_stop):
    with aws.Queue(settings.work_queue) as work_queue:
        for i, g in qb.groupby(queue, size=settings.param.size):
            Log.note(
                "Preparing {{num}} pulse messages to bucket={{bucket}}",
                num=len(g),
                bucket=bucket.name
            )

            full_key = unicode(synch.next_key) + ":" + unicode(MIN(g.select("_meta.count")))
            try:
                output = [
                    set_default(
                        d,
                        {"etl": {
                            "name": "Pulse block",
                            "bucket": settings.destination.bucket,
                            "timestamp": Date.now().unix,
                            "id": synch.next_key,
                            "source": {
                                "name": "pulse.mozilla.org",
                                "id": d._meta.count,
                                "count": d._meta.count,
                                "message_id": d._meta.message_id,
                                "sent": Date(d._meta.sent),
                            },
                            "type": "aggregation"
                        }}
                    )
                    for i, d in enumerate(g)
                    if d != None  # HAPPENS WHEN PERSISTENT QUEUE FAILS TO LOG start
                ]
                bucket.write(full_key, "\n".join(convert.value2json(d) for d in output))
                synch.advance()
                synch.source_key = MAX(g.select("_meta.count")) + 1

                now = Date.now()
                work_queue.add({
                    "bucket": bucket.name,
                    "key": full_key,
                    "timestamp": now.unix,
                    "date/time": now.format()
                })

                synch.ping()
                queue.commit()
                Log.note("Wrote {{num}} pulse messages to bucket={{bucket}}, key={{key}} ",
                    num= len(g),
                    bucket= bucket.name,
                    key= full_key)
            except Exception, e:
                queue.rollback()
                if not queue.closed:
                    Log.warning("Problem writing {{key}} to S3", key=full_key, cause=e)

            if please_stop:
                break
Exemple #49
0
 def _get_and_retry(self, url, branch, **kwargs):
     """
     requests 2.5.0 HTTPS IS A LITTLE UNSTABLE
     """
     kwargs = set_default(kwargs, {"timeout": self.timeout.seconds})
     try:
         return _get_url(url, branch, **kwargs)
     except Exception, e:
         pass
def _get_branches_from_hg(settings):
    # GET MAIN PAGE
    response = http.get(settings.url)
    doc = BeautifulSoup(response.all_content)

    all_repos = doc("table")[1]
    branches = UniqueIndex(["name", "locale"], fail_on_dup=False)
    for i, r in enumerate(all_repos("tr")):
        dir, name = [v.text.strip() for v in r("td")]

        b = _get_single_branch_from_hg(settings, name, dir.lstrip("/"))
        branches.extend(b)

    # branches.add(set_default({"name": "release-mozilla-beta"}, branches["mozilla-beta", DEFAULT_LOCALE]))
    for b in list(branches["mozilla-beta", ]):
        branches.add(set_default({"name": "release-mozilla-beta"}, b))  # THIS IS THE l10n "name"
        b.url = "https://hg.mozilla.org/releases/mozilla-beta"          # THIS IS THE

    for b in list(branches["mozilla-release", ]):
        branches.add(set_default({"name": "release-mozilla-release"}, b))

    for b in list(branches["mozilla-aurora", ]):
        if b.locale == "en-US":
            continue
        branches.add(set_default({"name": "comm-aurora"}, b))
        # b.url = "https://hg.mozilla.org/releases/mozilla-aurora"

    for b in list(branches):
        if b.name.startswith("mozilla-esr"):
            branches.add(set_default({"name": "release-" + b.name}, b))  # THIS IS THE l10n "name"
            b.url = "https://hg.mozilla.org/releases/" + b.name

    #CHECKS
    for b in branches:
        if b.name != b.name.lower():
            Log.error("Expecting lowercase name")
        if not b.locale:
            Log.error("Not expected")
        if not b.url.startswith("http"):
            Log.error("Expecting a valid url")
        if not b.etl.timestamp:
            Log.error("Expecting a timestamp")

    return branches
Exemple #51
0
    def test_meta(self):
        test = wrap({
            "query": {"from": "meta.columns"},
            "data": [
                {"a": "b"}
            ]
        })

        settings = self.utils.fill_container(test, tjson=False)

        table_name = settings.index

        # WE REQUIRE A QUERY TO FORCE LOADING OF METADATA
        pre_test = {
            "query": {
                "from": table_name
            },
            "expecting_list": {
                "meta": {"format": "list"}, "data": [{"a": "b"}]
            }
        }
        self.utils.send_queries(pre_test)

        test = set_default(test, {
            "query": {
                "select": ["name", "table", "type", "nested_path"],
                "from": "meta.columns",
                "where": {"eq": {"table": table_name}}
            },
            "expecting_list": {
                "meta": {"format": "list"}, "data": [
                    {"table": table_name, "name": "a", "type": "string", "nested_path": "."}
                ]
            },
            "expecting_table": {
                "meta": {"format": "table"},
                "header": ["table", "name", "type", "nested_path"],
                "data": [[table_name, "a", "string", "."]]
            },
            "expecting_cube": {
                "meta": {"format": "cube"},
                "edges": [
                    {
                        "name": "rownum",
                        "domain": {"type": "rownum", "min": 0, "max": 1, "interval": 1}
                    }
                ],
                "data": {
                    "table": [table_name],
                    "name": ["a"],
                    "type": ["string"],
                    "nested_path": ["."]
                }
            }
        })
        self.utils.send_queries(test)
Exemple #52
0
    def get_schema(self, retry=True):
        if self.settings.explore_metadata:
            indices = self.cluster.get_metadata().indices
            if not self.settings.alias or self.settings.alias==self.settings.index:
                #PARTIALLY DEFINED settings
                candidates = [(name, i) for name, i in indices.items() if self.settings.index in i.aliases]
                # TODO: MERGE THE mappings OF ALL candidates, DO NOT JUST PICK THE LAST ONE

                index = "dummy value"
                schema = wrap({"_routing": {}, "properties": {}})
                for _, ind in jx.sort(candidates, {"value": 0, "sort": -1}):
                    mapping = ind.mappings[self.settings.type]
                    set_default(schema._routing, mapping._routing)
                    schema.properties = _merge_mapping(schema.properties, mapping.properties)
            else:
                #FULLY DEFINED settings
                index = indices[self.settings.index]
                schema = index.mappings[self.settings.type]

            if index == None and retry:
                #TRY AGAIN, JUST IN CASE
                self.cluster.cluster_state = None
                return self.get_schema(retry=False)

            #TODO: REMOVE THIS BUG CORRECTION
            if not schema and self.settings.type == "test_result":
                schema = index.mappings["test_results"]
            # DONE BUG CORRECTION

            if not schema:
                Log.error(
                    "ElasticSearch index ({{index}}) does not have type ({{type}})",
                    index=self.settings.index,
                    type=self.settings.type
                )
            return schema
        else:
            mapping = self.cluster.get(self.path + "/_mapping")
            if not mapping[self.settings.type]:
                Log.error("{{index}} does not have type {{type}}", self.settings)
            return wrap({"mappings": mapping[self.settings.type]})
Exemple #53
0
def _replace_ref(node, url):
    if url.path.endswith("/"):
        url.path = url.path[:-1]

    if isinstance(node, Mapping):
        ref, raw_ref, node["$ref"] = URL(node["$ref"]), node["$ref"], None

        # RECURS
        return_value = node
        candidate = {}
        for k, v in node.items():
            new_v = _replace_ref(v, url)
            candidate[k] = new_v
            if new_v is not v:
                return_value = candidate
        if not ref:
            return return_value
        else:
            node = return_value

        if not ref.scheme and not ref.path:
            # DO NOT TOUCH LOCAL REF YET
            node["$ref"] = ref
            return node

        if not ref.scheme:
            # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH
            # REQUIRES THE CURRENT DOCUMENT'S SCHEME
            ref.scheme = url.scheme

        # FIND THE SCHEME AND LOAD IT
        if ref.scheme in scheme_loaders:
            new_value = scheme_loaders[ref.scheme](ref, url)
        else:
            raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme)

        if ref.fragment:
            new_value = new_value[ref.fragment]

        if isinstance(new_value, Mapping):
            return set_default({}, node, new_value)
        elif node.keys() and new_value == None:
            return node
        else:
            return wrap(new_value)

    elif isinstance(node, list):
        candidate = [_replace_ref(n, url) for n in node]
        if all(p[0] is p[1] for p in zip(candidate, node)):
            return node
        return candidate

    return node
Exemple #54
0
    def append_query(self, es_query, start):
        self.start = start
        domain = self.domain
        field = self.edge.value

        if isinstance(field, Variable):
            key = domain.key
            if isinstance(key, (tuple, list)) and len(key)==1:
                key = key[0]
            include = [p[key] for p in domain.partitions]
            if self.edge.allowNulls:

                return wrap({"aggs": {
                    "_match": set_default({"terms": {
                        "field": field.var,
                        "size": self.limit,
                        "include": include
                    }}, es_query),
                    "_missing": set_default(
                        {"filter": {"or": [
                            field.missing().to_esfilter(),
                            {"not": {"terms": {field.var: include}}}
                        ]}},
                        es_query
                    ),
                }})
            else:
                return wrap({"aggs": {
                    "_match": set_default({"terms": {
                        "field": field.var,
                        "size": self.limit,
                        "include": include
                    }}, es_query)
                }})
        else:
            include = [p[domain.key] for p in domain.partitions]
            if self.edge.allowNulls:

                return wrap({"aggs": {
                    "_match": set_default({"terms": {
                        "script_field": field.to_ruby(),
                        "size": self.limit,
                        "include": include
                    }}, es_query),
                    "_missing": set_default(
                        {"filter": {"or": [
                            field.missing().to_esfilter(),
                            NotOp("not", InOp("in", [field, Literal("literal", include)])).to_esfilter()
                        ]}},
                        es_query
                    ),
                }})
            else:
                return wrap({"aggs": {
                    "_match": set_default({"terms": {
                        "script_field": field.to_ruby(),
                        "size": self.limit,
                        "include": include
                    }}, es_query)
                }})
Exemple #55
0
def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not _containers:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not _containers.config.default.settings:
            Log.error(
                "expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info"
            )

        type_ = None
        index = frum
        if frum.startswith("meta."):
            if frum == "meta.columns":
                return _meta.singlton.meta.columns
            elif frum == "meta.tables":
                return _meta.singlton.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)
        else:
            type_ = _containers.config.default.type
            index = join_field(split_field(frum)[:1:])

        settings = set_default({
            "index": index,
            "name": frum
        }, _containers.config.default.settings)
        settings.type = None
        return _containers.type2container[type_](settings)
    elif isinstance(
            frum,
            Mapping) and frum.type and _containers.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return _containers.type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"]
                                        or isinstance(frum["from"],
                                                      (list, set))):
        from pyLibrary.queries.query import QueryOp
        return QueryOp.wrap(frum, schema=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum
 def _get_and_retry(self, url, **kwargs):
     """
     requests 2.5.0 HTTPS IS A LITTLE UNSTABLE
     """
     kwargs = set_default(kwargs, {"timeout": self.timeout.seconds})
     try:
         return http.get(url, **kwargs)
     except Exception, e:
         try:
             Thread.sleep(seconds=5)
             return http.get(url.replace("https://", "http://"), **kwargs)
         except Exception, f:
             Log.error("Tried {{url}} twice.  Both failed.", {"url": url},
                       cause=[e, f])
Exemple #57
0
    def append_query(self, es_query, start):
        self.start = start
        for i, (k, v) in enumerate(self.fields):
            es_query = wrap({
                "aggs": {
                    "_match": set_default({"terms": {
                        "field": v
                    }}, es_query),
                    "_missing": set_default({"missing": {
                        "field": v
                    }}, es_query),
                }
            })

        if self.edge.domain.where:
            filter = simplify_esfilter(self.edge.domain.where)
            es_query = {
                "aggs": {
                    "_filter": set_default({"filter": filter}, es_query)
                }
            }

        return es_query
Exemple #58
0
    def _convert_edge(self, edge):
        dim = self.dimensions[edge.value]
        if not dim:
            return edge

        if len(listwrap(dim.fields)) == 1:
            #TODO: CHECK IF EDGE DOMAIN AND DIMENSION DOMAIN CONFLICT
            new_edge = set_default({"value": unwraplist(dim.fields)}, edge)
            return new_edge
            new_edge.domain = dim.getDomain()

        edge = copy(edge)
        edge.value = None
        edge.domain = dim.getDomain()
        return edge
Exemple #59
0
def _replace_locals(node, doc_path):
    if isinstance(node, Mapping):
        # RECURS, DEEP COPY
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = v
            elif v == None:
                continue
            else:
                output[k] = _replace_locals(v, [v] + doc_path)

        if not ref:
            return output

        # REFER TO SELF
        frag = ref.fragment
        if frag[0] == ".":
            # RELATIVE
            for i, p in enumerate(frag):
                if p != ".":
                    if i > len(doc_path):
                        _Log.error(
                            "{{frag|quote}} reaches up past the root document",
                            frag=frag)
                    new_value = dot.get_attr(doc_path[i - 1], frag[i::])
                    break
            else:
                new_value = doc_path[len(frag) - 1]
        else:
            # ABSOLUTE
            new_value = dot.get_attr(doc_path[-1], frag)

        new_value = _replace_locals(new_value, [new_value] + doc_path)

        if not output:
            return new_value  # OPTIMIZATION FOR CASE WHEN node IS {}
        else:
            return unwrap(set_default(output, new_value))

    elif isinstance(node, list):
        candidate = [_replace_locals(n, [n] + doc_path) for n in node]
        # if all(p[0] is p[1] for p in zip(candidate, node)):
        #     return node
        return candidate

    return node