Ejemplo n.º 1
0
def get_branches(hg, branches, kwargs=None):
    # TRY ES
    cluster = elasticsearch.Cluster(branches)
    try:
        es = cluster.get_index(kwargs=branches, read_only=False)
        esq = jx_elasticsearch.new_instance(branches)
        found_branches = esq.query({"from": "branches", "format": "list", "limit": 10000}).data

        # IF IT IS TOO OLD, THEN PULL FROM HG
        oldest = Date(MAX(found_branches.etl.timestamp))
        if oldest == None or Date.now() - oldest > OLD_BRANCH:
            found_branches = _get_branches_from_hg(hg)
            es.extend({"id": b.name + " " + b.locale, "value": b} for b in found_branches)
            es.flush()

        try:
            return UniqueIndex(["name", "locale"], data=found_branches, fail_on_dup=False)
        except Exception as e:
            Log.error("Bad branch in ES index", cause=e)
    except Exception as e:
        e = Except.wrap(e)
        if "Can not find index " in e:
            set_default(branches, {"schema": branches_schema})
            es = cluster.get_or_create_index(branches)
            es.add_alias()
            return get_branches(kwargs)
        Log.error("problem getting branches", cause=e)
Ejemplo n.º 2
0
    def append_query(self, es_query, start):
        self.start = start

        parts = self.edge.domain.partitions
        filters = []
        notty = []

        for p in parts:
            w = p.where
            filters.append(AndOp("and", [w] + notty).to_esfilter(self.schema))
            notty.append(NotOp("not", w))

        missing_filter = None
        if self.edge.allowNulls:  # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
            missing_filter = set_default(
                {"filter": AndOp("and", notty).to_esfilter(self.schema)},
                es_query
            )

        return wrap({"aggs": {
            "_match": set_default(
                {"filters": {"filters": filters}},
                es_query
            ),
            "_missing": missing_filter
        }})
Ejemplo n.º 3
0
def _delayed_imports():
    global _ListContainer
    global _meta
    global _containers


    from pyLibrary.queries import meta as _meta
    from pyLibrary.queries.containers.list_usingPythonList import ListContainer as _ListContainer
    from pyLibrary.queries import containers as _containers

    _ = _ListContainer
    _ = _meta
    _ = _containers

    try:
        from pyLibrary.queries.jx_usingMySQL import MySQL
    except Exception:
        MySQL = None

    from pyLibrary.queries.jx_usingES import FromES
    from pyLibrary.queries.meta import FromESMetadata

    set_default(_containers.type2container, {
        "elasticsearch": FromES,
        "mysql": MySQL,
        "memory": None,
        "meta": FromESMetadata
    })
Ejemplo n.º 4
0
    def _upsert_column(self, c):
        # ASSUMING THE  self.meta.columns.locker IS HAD
        existing_columns = self.meta.columns.find(c.es_index, c.names["."])
        if not existing_columns:
            self.meta.columns.add(c)
            self.todo.add(c)

            if ENABLE_META_SCAN:
                if DEBUG:
                    Log.note("todo: {{table}}::{{column}}", table=c.es_index, column=c.es_column)
                # MARK meta.columns AS DIRTY TOO
                cols = self.meta.columns.find("meta.columns", None)
                for cc in cols:
                    cc.partitions = cc.cardinality = None
                    cc.last_updated = Date.now()
                self.todo.extend(cols)
        else:
            canonical = existing_columns[0]
            if canonical is not c:
                set_default(c.names, canonical.names)
                for key in Column.__slots__:
                    canonical[key] = c[key]
            if DEBUG:
                Log.note("todo: {{table}}::{{column}}", table=canonical.es_index, column=canonical.es_column)
            self.todo.add(canonical)
Ejemplo n.º 5
0
def _range_composer(edge, domain, es_query, to_float, schema):
    # USE RANGES
    _min = coalesce(domain.min, MIN(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if edge.allowNulls:
        missing_filter = set_default(
            {
                "filter": NotOp("not", AndOp("and", [
                    edge.value.exists(),
                    InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]),
                    InequalityOp("lt", [edge.value, Literal(None, to_float(_max))])
                ]).partial_eval()).to_esfilter(schema)
            },
            es_query
        )
    else:
        missing_filter = None

    if isinstance(edge.value, Variable):
        calc = {"field": schema.leaves(edge.value.var)[0].es_column}
    else:
        calc = {"script": edge.value.to_es_script(schema).script(schema)}

    return wrap({"aggs": {
        "_match": set_default(
            {"range": calc},
            {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
            es_query
        ),
        "_missing": missing_filter
    }})
Ejemplo n.º 6
0
def es_setop(es, query):
    es_query, filters = es14.util.es_query_template(query.frum.name)
    set_default(filters[0], simplify_esfilter(query.where.to_esfilter()))
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort)
    es_query.fields = FlatList()

    return extract_rows(es, es_query, query)
Ejemplo n.º 7
0
 def __init__(self, **desc):
     desc = wrap(desc)
     self._set_slots_to_null(self.__class__)
     set_default(self, desc)
     self.name = coalesce(desc.name, desc.type)
     self.isFacet = coalesce(desc.isFacet, False)
     self.dimension = Null
     self.limit = desc.limit
Ejemplo n.º 8
0
 def _convert_clause(self, clause):
     """
     JSON QUERY EXPRESSIONS HAVE MANY CLAUSES WITH SIMILAR COLUMN DELCARATIONS
     """
     if clause == None:
         return None
     elif isinstance(clause, Mapping):
         return set_default({"value": self.convert(clause["value"])}, clause)
     else:
         return [set_default({"value": self.convert(c.value)}, c) for c in clause]
Ejemplo n.º 9
0
    def append_query(self, es_query, start):
        self.start = start

        if not isinstance(self.edge.value, Variable):
            if self.exists is TRUE:
                # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
                output = wrap({"aggs": {
                    "_match": set_default(
                        {"terms": {
                            "script": self.script.expr,
                            "size": self.domain.limit,
                            "order": self.es_order
                        }},
                        es_query
                    )
                }})
            else:
                output = wrap({"aggs": {
                    "_match": {  # _match AND _filter REVERSED SO _match LINES UP WITH _missing
                        "filter": self.exists.to_esfilter(self.schema),
                        "aggs": {
                            "_filter": set_default(
                                {"terms": {
                                    "script": self.script.expr,
                                    "size": self.domain.limit,
                                    "order": self.es_order
                                }},
                                es_query
                            )
                        }
                    },
                    "_missing": set_default(
                        {"filter": self.missing.to_esfilter(self.schema)},
                        es_query
                    )
                }})
            return output
        else:
            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "field": self.schema.leaves(self.edge.value.var)[0].es_column,
                        "size": self.domain.limit,
                        "order": self.es_order
                    }},
                    es_query
                ),
                "_missing": set_default(
                    {"filter": self.missing.to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
Ejemplo n.º 10
0
    def _convert_clause(self, clause):
        """
        JSON QUERY EXPRESSIONS HAVE MANY CLAUSES WITH SIMILAR COLUMN DELCARATIONS
        """
        clause = wrap(clause)

        if clause == None:
            return None
        elif is_data(clause):
            return set_default({"value": self.convert(clause.value)}, clause)
        else:
            return [set_default({"value": self.convert(c.value)}, c) for c in clause]
Ejemplo n.º 11
0
    def append_query(self, es_query, start):
        self.start = start
        domain = self.domain

        domain_key = domain.key
        include, text_include = transpose(*(
            (
                float(v) if isinstance(v, (int, float)) else v,
                text_type(float(v)) if isinstance(v, (int, float)) else v
            )
            for v in (p[domain_key] for p in domain.partitions)
        ))
        value = self.edge.value
        exists = AndOp("and", [
            value.exists(),
            InOp("in", [value, Literal("literal", include)])
        ]).partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if isinstance(value, Variable):
            es_field = self.query.frum.schema.leaves(value.var)[0].es_column  # ALREADY CHECKED THERE IS ONLY ONE
            terms = set_default({"terms": {
                "field": es_field,
                "size": limit,
                "order": {"_term": self.sorted} if self.sorted else None
            }}, es_query)
        else:
            terms = set_default({"terms": {
                "script": {
                    "lang": "painless",
                    "inline": value.to_es_script(self.schema).script(self.schema)
                },
                "size": limit
            }}, es_query)

        if self.edge.allowNulls:
            missing = set_default(
                {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                es_query
            )
        else:
            missing = None

        return wrap({"aggs": {
            "_match": {
                "filter": exists.to_esfilter(self.schema),
                "aggs": {
                    "_filter": terms
                }
            },
            "_missing": missing
        }})
Ejemplo n.º 12
0
 def append_query(self, es_query, start):
     self.start = start
     for i, v in enumerate(self.fields):
         nest = wrap({"aggs": {
             "_match": set_default({"terms": {
                 "field": v,
                 "size": self.domain.limit
             }}, es_query),
             "_missing": set_default(
                 {"filter": {"missing": {"field": v}}},
                 es_query
             )
         }})
         es_query = nest
     return es_query
Ejemplo n.º 13
0
    def __init__(self, name, params, cwd=None, env=None, debug=False, shell=False, bufsize=-1):
        self.name = name
        self.service_stopped = Signal("stopped signal for " + strings.quote(name))
        self.stdin = Queue("stdin for process " + strings.quote(name), silent=True)
        self.stdout = Queue("stdout for process " + strings.quote(name), silent=True)
        self.stderr = Queue("stderr for process " + strings.quote(name), silent=True)

        try:
            self.debug = debug or DEBUG
            self.service = service = subprocess.Popen(
                params,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                bufsize=bufsize,
                cwd=cwd if isinstance(cwd, (basestring, NullType, NoneType)) else cwd.abspath,
                env=unwrap(set_default(env, os.environ)),
                shell=shell
            )

            self.please_stop = Signal()
            self.please_stop.on_go(self._kill)
            self.thread_locker = Lock()
            self.children = [
                Thread.run(self.name + " stdin", self._writer, service.stdin, self.stdin, please_stop=self.service_stopped, parent_thread=self),
                Thread.run(self.name + " stdout", self._reader, "stdout", service.stdout, self.stdout, please_stop=self.service_stopped, parent_thread=self),
                Thread.run(self.name + " stderr", self._reader, "stderr", service.stderr, self.stderr, please_stop=self.service_stopped, parent_thread=self),
                Thread.run(self.name + " waiter", self._monitor, parent_thread=self),
            ]
        except Exception as e:
            Log.error("Can not call", e)

        if self.debug:
            Log.note("{{process}} START: {{command}}", process=self.name, command=" ".join(map(strings.quote, params)))
Ejemplo n.º 14
0
    def alarm(
        cls,
        template,
        default_params={},
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        # USE replace() AS POOR MAN'S CHILD TEMPLATE

        template = ("*" * 80) + "\n" + indent(template, prefix="** ").strip() + "\n" + ("*" * 80)
        Log.note(
            template,
            default_params=default_params,
            stack_depth=stack_depth + 1,
            log_context=set_default({"context": exceptions.ALARM}, log_context),
            **more_params
        )
Ejemplo n.º 15
0
    def send(self, topic, message):
        """Publishes a pulse message to the proper exchange."""

        if not message:
            Log.error("Expecting a message")

        message._prepare()

        if not self.connection:
            self.connect()

        producer = Producer(
            channel=self.connection,
            exchange=Exchange(self.settings.exchange, type='topic'),
            routing_key=topic
        )

        # The message is actually a simple envelope format with a payload and
        # some metadata.
        final_data = Data(
            payload=message.data,
            _meta=set_default({
                'exchange': self.settings.exchange,
                'routing_key': message.routing_key,
                'serializer': self.settings.serializer,
                'sent': time_to_string(datetime.datetime.now(timezone(self.settings.broker_timezone))),
                'count': self.count
            }, message.metadata)
        )

        producer.publish(jsons.scrub(final_data), serializer=self.settings.serializer)
        self.count += 1
Ejemplo n.º 16
0
    def unexpected(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        if cause and not isinstance(cause, Except):
            cause = Except(exceptions.UNEXPECTED, text_type(cause), trace=exceptions._extract_traceback(0))

        trace = exceptions.extract_stack(1)
        e = Except(type=exceptions.UNEXPECTED, template=template, params=params, cause=cause, trace=trace)
        Log.note(
            "{{error}}",
            error=e,
            log_context=set_default({"context": exceptions.WARNING}, log_context),
            stack_depth=stack_depth + 1
        )
Ejemplo n.º 17
0
def new_instance(settings):
    """
    MAKE A PYTHON INSTANCE

    `settings` HAS ALL THE `kwargs`, PLUS `class` ATTRIBUTE TO INDICATE THE CLASS TO CREATE
    """
    settings = set_default({}, settings)
    if not settings["class"]:
        Log.error("Expecting 'class' attribute with fully qualified class name")

    # IMPORT MODULE FOR HANDLER
    path = settings["class"].split(".")
    class_name = path[-1]
    path = ".".join(path[:-1])
    constructor = None
    try:
        temp = __import__(path, globals(), locals(), [class_name], 0)
        constructor = object.__getattribute__(temp, class_name)
    except Exception as e:
        Log.error("Can not find class {{class}}", {"class": path}, cause=e)

    settings['class'] = None
    try:
        return constructor(kwargs=settings)  # MAYBE IT TAKES A KWARGS OBJECT
    except Exception as e:
        pass

    try:
        return constructor(**settings)
    except Exception as e:
        Log.error("Can not create instance of {{name}}", name=".".join(path), cause=e)
Ejemplo n.º 18
0
 def map_to_es(self):
     """
     RETURN A MAP FROM THE NAMESPACE TO THE es_column NAME
     """
     output = {}
     for path in self.query_path:
         set_default(
             output,
             {
                 k: c.es_column
                 for c in self.snowflake.columns
                 if c.jx_type not in STRUCT
                 for rel_name in [c.names[path]]
                 for k in [rel_name, untype_path(rel_name), unnest_path(rel_name)]
             }
         )
     return output
Ejemplo n.º 19
0
 def query(self, _query):
     return self.meta.columns.query(QueryOp(set_default(
         {
             "from": self.meta.columns,
             "sort": ["table", "name"]
         },
         _query.__data__()
     )))
Ejemplo n.º 20
0
def es_query_proto(path, selects, wheres, schema):
    """
    RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
    :param path: THE NESTED PATH (NOT INCLUDING TABLE NAME)
    :param wheres: MAP FROM path TO LIST OF WHERE CONDITIONS
    :return: (es_query, filters_map) TUPLE
    """
    output = None
    last_where = MATCH_ALL
    for p in reversed(sorted( wheres.keys() | set(selects.keys()))):
        where = wheres.get(p)
        select = selects.get(p)

        if where:
            where = AndOp(where).partial_eval().to_esfilter(schema)
            if output:
                where = es_or([es_and([output, where]), where])
        else:
            if output:
                if last_where is MATCH_ALL:
                    where = es_or([output, MATCH_ALL])
                else:
                    where = output
            else:
                where = MATCH_ALL

        if p == ".":
            output = set_default(
                {
                    "from": 0,
                    "size": 0,
                    "sort": [],
                    "query": where
                },
                select.to_es()
            )
        else:
            output = {"nested": {
                "path": p,
                "inner_hits": set_default({"size": 100000}, select.to_es()) if select else None,
                "query": where
            }}

        last_where = where
    return output
Ejemplo n.º 21
0
def _get_branches_from_hg(kwarg):
    # GET MAIN PAGE
    response = http.get(kwarg.url)
    doc = BeautifulSoup(response.all_content, "html.parser")

    all_repos = doc("table")[1]
    branches = UniqueIndex(["name", "locale"], fail_on_dup=False)
    for i, r in enumerate(all_repos("tr")):
        dir, name = [v.text.strip() for v in r("td")]

        b = _get_single_branch_from_hg(kwarg, name, dir.lstrip("/"))
        branches.extend(b)

    # branches.add(set_default({"name": "release-mozilla-beta"}, branches["mozilla-beta", DEFAULT_LOCALE]))
    for b in list(branches["mozilla-beta", ]):
        branches.add(set_default({"name": "release-mozilla-beta"}, b))  # THIS IS THE l10n "name"
        b.url = "https://hg.mozilla.org/releases/mozilla-beta"          # THIS IS THE

    for b in list(branches["mozilla-release", ]):
        branches.add(set_default({"name": "release-mozilla-release"}, b))

    for b in list(branches["mozilla-aurora", ]):
        if b.locale == "en-US":
            continue
        branches.add(set_default({"name": "comm-aurora"}, b))
        # b.url = "https://hg.mozilla.org/releases/mozilla-aurora"

    for b in list(branches):
        if b.name.startswith("mozilla-esr"):
            branches.add(set_default({"name": "release-" + b.name}, b))  # THIS IS THE l10n "name"
            b.url = "https://hg.mozilla.org/releases/" + b.name

    #CHECKS
    for b in branches:
        if b.name != b.name.lower():
            Log.error("Expecting lowercase name")
        if not b.locale:
            Log.error("Not expected")
        if not b.url.startswith("http"):
            Log.error("Expecting a valid url")
        if not b.etl.timestamp:
            Log.error("Expecting a timestamp")

    return branches
Ejemplo n.º 22
0
def _range_composer(edge, domain, es_query, to_float, schema):
    # USE RANGES
    _min = coalesce(domain.min, MIN(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if edge.allowNulls:
        missing_filter = set_default(
            {
                "filter":
                NotOp(
                    "not",
                    AndOp("and", [
                        edge.value.exists(),
                        InequalityOp(
                            "gte", [edge.value,
                                    Literal(None, to_float(_min))]),
                        InequalityOp(
                            "lt", [edge.value,
                                   Literal(None, to_float(_max))])
                    ]).partial_eval()).to_esfilter(schema)
            }, es_query)
    else:
        missing_filter = None

    if isinstance(edge.value, Variable):
        calc = {"field": schema.leaves(edge.value.var)[0].es_column}
    else:
        calc = {"script": edge.value.to_painless(schema).script(schema)}

    return wrap({
        "aggs": {
            "_match":
            set_default({"range": calc}, {
                "range": {
                    "ranges": [{
                        "from": to_float(p.min),
                        "to": to_float(p.max)
                    } for p in domain.partitions]
                }
            }, es_query),
            "_missing":
            missing_filter
        }
    })
Ejemplo n.º 23
0
    def __init__(
            self,
            hg=None,  # CONNECT TO hg
            repo=None,  # CONNECTION INFO FOR ES CACHE
            branches=None,  # CONNECTION INFO FOR ES CACHE
            use_cache=False,  # True IF WE WILL USE THE ES FOR DOWNLOADING BRANCHES
            timeout=30 * SECOND,
            kwargs=None):
        if not _hg_branches:
            _late_imports()

        self.es_locker = Lock()
        self.todo = mo_threads.Queue("todo for hg daemon",
                                     max=DAEMON_QUEUE_SIZE)

        self.settings = kwargs
        self.timeout = Duration(timeout)

        # VERIFY CONNECTIVITY
        with Explanation("Test connect with hg"):
            response = http.head(self.settings.hg.url)

        if branches == None:
            self.branches = _hg_branches.get_branches(kwargs=kwargs)
            self.es = None
            return

        self.last_cache_miss = Date.now()

        set_default(repo, {"schema": revision_schema})
        self.es = elasticsearch.Cluster(kwargs=repo).get_or_create_index(
            kwargs=repo)

        def setup_es(please_stop):
            with suppress_exception:
                self.es.add_alias()

            with suppress_exception:
                self.es.set_refresh_interval(seconds=1)

        Thread.run("setup_es", setup_es)
        self.branches = _hg_branches.get_branches(kwargs=kwargs)
        self.timeout = timeout
        Thread.run("hg daemon", self._daemon)
Ejemplo n.º 24
0
def _replace_ref(node, url):
    if url.path.endswith("/"):
        url.path = url.path[:-1]

    if is_data(node):
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = URL(v)
            else:
                output[k] = _replace_ref(v, url)

        if not ref:
            return output

        node = output

        if not ref.scheme and not ref.path:
            # DO NOT TOUCH LOCAL REF YET
            output["$ref"] = ref
            return output

        if not ref.scheme:
            # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH
            # REQUIRES THE CURRENT DOCUMENT'S SCHEME
            ref.scheme = url.scheme

        # FIND THE SCHEME AND LOAD IT
        if ref.scheme in scheme_loaders:
            new_value = scheme_loaders[ref.scheme](ref, url)
        else:
            raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme)

        if ref.fragment:
            new_value = mo_dots.get_attr(new_value, ref.fragment)

        DEBUG and Log.note("Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value)

        if not output:
            output = new_value
        elif is_text(output):
            Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value)
        else:
            output = unwrap(set_default(output, new_value))

        DEBUG and Log.note("Return {{output}}", output=output)

        return output
    elif is_list(node):
        output = [_replace_ref(n, url) for n in node]
        # if all(p[0] is p[1] for p in zip(output, node)):
        #     return node
        return output

    return node
Ejemplo n.º 25
0
def _replace_ref(node, url):
    if url.path.endswith("/"):
        url.path = url.path[:-1]

    if isinstance(node, Mapping):
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = URL(v)
            else:
                output[k] = _replace_ref(v, url)

        if not ref:
            return output

        node = output

        if not ref.scheme and not ref.path:
            # DO NOT TOUCH LOCAL REF YET
            output["$ref"] = ref
            return output

        if not ref.scheme:
            # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH
            # REQUIRES THE CURRENT DOCUMENT'S SCHEME
            ref.scheme = url.scheme

        # FIND THE SCHEME AND LOAD IT
        if ref.scheme in scheme_loaders:
            new_value = scheme_loaders[ref.scheme](ref, url)
        else:
            raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme)

        if ref.fragment:
            new_value = mo_dots.get_attr(new_value, ref.fragment)

        DEBUG and Log.note("Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value)

        if not output:
            output = new_value
        elif isinstance(output, text_type):
            Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value)
        else:
            output = unwrap(set_default(output, new_value))

        DEBUG and Log.note("Return {{output}}", output=output)

        return output
    elif isinstance(node, list):
        output = [_replace_ref(n, url) for n in node]
        # if all(p[0] is p[1] for p in zip(output, node)):
        #     return node
        return output

    return node
Ejemplo n.º 26
0
def start():
    try:
        config = json2value(STDIN.readline().decode('utf8'))
        constants.set(config.constants)
        Log.start(set_default(config.debug, {"logs": [{"type": "raw"}]}))
        command_loop({"config": config})
    except Exception as e:
        Log.error("problem staring worker", cause=e)
    finally:
        Log.stop()
Ejemplo n.º 27
0
 def _status_update(self, mail, status, more=Null):
     """
     UPDATE MESSAGE ID STATUS FOR PTHER PROCESSES TO KNOW
     :param mail: 
     :param status: 
     :return: 
     """
     mail.status = status
     if mail.sender.track_started:
         self.response_queue.add(value2json(set_default({"request": {"id": mail.request.id}, "status": status}, more)))
Ejemplo n.º 28
0
def es_query_proto(path, selects, wheres, schema):
    """
    RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
    :param path: THE NESTED PATH (NOT INCLUDING TABLE NAME)
    :param wheres: MAP FROM path TO LIST OF WHERE CONDITIONS
    :return: (es_query, filters_map) TUPLE
    """
    output = None
    last_where = MATCH_ALL
    for p in reversed(sorted(set(wheres.keys()) | set(selects.keys()))):
        where = wheres.get(p)
        select = selects.get(p)

        if where:
            where = AndOp(where).partial_eval().to_esfilter(schema)
            if output:
                where = es_or([es_and([output, where]), where])
        else:
            if output:
                if last_where is MATCH_ALL:
                    where = es_or([output, MATCH_ALL])
                else:
                    where = output
            else:
                where = MATCH_ALL

        if p == ".":
            output = set_default(
                {"from": 0, "size": 0, "sort": [], "query": where}, select.to_es()
            )
        else:
            output = {
                "nested": {
                    "path": p,
                    "inner_hits": set_default({"size": 100000}, select.to_es())
                    if select
                    else None,
                    "query": where,
                }
            }

        last_where = where
    return output
Ejemplo n.º 29
0
    def __init__(
        self,
        hg=None,        # CONNECT TO hg
        repo=None,      # CONNECTION INFO FOR ES CACHE
        branches=None,  # CONNECTION INFO FOR ES CACHE
        use_cache=False,   # True IF WE WILL USE THE ES FOR DOWNLOADING BRANCHES
        timeout=30 * SECOND,
        kwargs=None
    ):
        if not _hg_branches:
            _late_imports()

        self.es_locker = Lock()
        self.todo = mo_threads.Queue("todo for hg daemon", max=DAEMON_QUEUE_SIZE)

        self.settings = kwargs
        self.timeout = Duration(timeout)

        # VERIFY CONNECTIVITY
        with Explanation("Test connect with hg"):
            response = http.head(self.settings.hg.url)

        if branches == None:
            self.branches = _hg_branches.get_branches(kwargs=kwargs)
            self.es = None
            return

        self.last_cache_miss = Date.now()

        set_default(repo, {"schema": revision_schema})
        self.es = elasticsearch.Cluster(kwargs=repo).get_or_create_index(kwargs=repo)

        def setup_es(please_stop):
            with suppress_exception:
                self.es.add_alias()

            with suppress_exception:
                self.es.set_refresh_interval(seconds=1)

        Thread.run("setup_es", setup_es)
        self.branches = _hg_branches.get_branches(kwargs=kwargs)
        self.timeout = timeout
        Thread.run("hg daemon", self._daemon)
Ejemplo n.º 30
0
    def __init__(
        self,
        host,
        user=None,
        port=None,
        config=None,
        gateway=None,
        forward_agent=None,
        connect_timeout=None,
        connect_kwargs=None,
        inline_ssh_env=None,
        key_filename=None,  # part of connect_kwargs
        kwargs=None,
    ):
        connect_kwargs = set_default(
            {}, connect_kwargs, {"key_filename": File(key_filename).abspath}
        )

        self.stdout = LogStream(host, "stdout")
        self.stderr = LogStream(host, "stderr")
        config = Config(**unwrap(set_default(
            {},
            config,
            {"overrides": {"run": {
                # "hide": True,
                "out_stream": self.stdout,
                "err_stream": self.stderr,
            }}},
        )))

        self.warn = False
        self.conn = _Connection(
            host,
            user,
            port,
            config,
            gateway,
            forward_agent,
            connect_timeout,
            connect_kwargs,
            inline_ssh_env,
        )
Ejemplo n.º 31
0
    def test_set_default(self):
        a = {"x": {"y": 1}}
        b = {"x": {"z": 2}}
        c = {}
        d = set_default(c, a, b)

        self.assertTrue(from_data(d) is c, "expecting first parameter to be returned")
        self.assertEqual(d.x.y, 1, "expecting d to have attributes of a")
        self.assertEqual(d.x.z, 2, "expecting d to have attributes of b")

        self.assertEqual(to_data(a).x.z, None, "a should not have been altered")
Ejemplo n.º 32
0
    def append_query(self, es_query, start):
        self.start = start

        es_field = self.query.frum.schema.leaves(self.var)[0].es_column
        es_query = wrap({"aggs": {
            "_match": set_default({"terms": {
                "script":  expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
            }}, es_query)
        }})

        return es_query
Ejemplo n.º 33
0
    def append_query(self, es_query, start):
        self.start = start

        es_field = self.query.frum.schema.leaves(self.var)[0].es_column
        es_query = wrap({"aggs": {
            "_match": set_default({"terms": {
                "script":  expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
            }}, es_query)
        }})

        return es_query
Ejemplo n.º 34
0
    def __init__(self, type=ERROR, template=Null, params=Null, cause=Null, trace=Null, **kwargs):
        Exception.__init__(self)
        self.type = type
        self.template = template
        self.params = set_default(kwargs, params)
        self.cause = cause

        if not trace:
            self.trace=extract_stack(2)
        else:
            self.trace = trace
Ejemplo n.º 35
0
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep,
                                               MINUTE)).seconds
        kwargs.host = Random.sample(listwrap(host), 1)[0]

        rollover_interval = coalesce(kwargs.rollover.interval,
                                     kwargs.rollover.max, "year")
        rollover_max = coalesce(kwargs.rollover.max, kwargs.rollover.interval,
                                "year")

        schema = set_default(kwargs.schema, {
            "mappings": {
                kwargs.type: {
                    "properties": {
                        "~N~": {
                            "type": "nested"
                        }
                    }
                }
            }
        }, json2value(value2json(SCHEMA), leaves=True))

        self.es = RolloverIndex(
            rollover_field={"get": [{
                "first": "."
            }, {
                "literal": "timestamp"
            }]},
            rollover_interval=rollover_interval,
            rollover_max=rollover_max,
            schema=schema,
            limit_replicas=True,
            typed=True,
            read_only=False,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)
Ejemplo n.º 36
0
    def append_query(self, es_query, start):
        #TODO: USE "reverse_nested" QUERY TO PULL THESE

        self.start = start
        for i, v in enumerate(self.fields):
            nest = wrap({"aggs": {
                "_match": set_default({"terms": {
                    "field": v,
                    "size": self.domain.limit
                }}, es_query)
            }})
            if self.edge.allowNulls:
                nest.aggs._missing = set_default({"missing": {"field": v}}, es_query)  # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
            es_query = nest

        if self.domain.where:
            filter = simplify_esfilter(self.domain.where)
            es_query = {"aggs": {"_filter": set_default({"filter": filter}, es_query)}}

        return es_query
Ejemplo n.º 37
0
    def __init__(self, type=ERROR, template=Null, params=Null, cause=Null, trace=Null, **kwargs):
        Exception.__init__(self)
        self.type = type
        self.template = template
        self.params = set_default(kwargs, params)
        self.cause = Except.wrap(cause)

        if not trace:
            self.trace=extract_stack(2)
        else:
            self.trace = trace
Ejemplo n.º 38
0
    def get_schema(self, retry=True):
        if self.settings.explore_metadata:
            indices = self.cluster.get_metadata().indices
            if not self.settings.alias or self.settings.alias==self.settings.index:
                #PARTIALLY DEFINED settings
                candidates = [(name, i) for name, i in indices.items() if self.settings.index in i.aliases]
                # TODO: MERGE THE mappings OF ALL candidates, DO NOT JUST PICK THE LAST ONE

                index = "dummy value"
                schema = wrap({"_routing": {}, "properties": {}})
                for _, ind in jx.sort(candidates, {"value": 0, "sort": -1}):
                    mapping = ind.mappings[self.settings.type]
                    set_default(schema._routing, mapping._routing)
                    schema.properties = _merge_mapping(schema.properties, mapping.properties)
            else:
                #FULLY DEFINED settings
                index = indices[self.settings.index]
                schema = index.mappings[self.settings.type]

            if index == None and retry:
                #TRY AGAIN, JUST IN CASE
                self.cluster.cluster_state = None
                return self.get_schema(retry=False)

            #TODO: REMOVE THIS BUG CORRECTION
            if not schema and self.settings.type == "test_result":
                schema = index.mappings["test_results"]
            # DONE BUG CORRECTION

            if not schema:
                Log.error(
                    "ElasticSearch index ({{index}}) does not have type ({{type}})",
                    index=self.settings.index,
                    type=self.settings.type
                )
            return schema
        else:
            mapping = self.cluster.get(self.path + "/_mapping")
            if not mapping[self.settings.type]:
                Log.error("{{index}} does not have type {{type}}", self.settings)
            return wrap({"mappings": mapping[self.settings.type]})
Ejemplo n.º 39
0
    def get_schema(self, retry=True):
        if self.settings.explore_metadata:
            indices = self.cluster.get_metadata().indices
            if not self.settings.alias or self.settings.alias==self.settings.index:
                #PARTIALLY DEFINED settings
                candidates = [(name, i) for name, i in indices.items() if self.settings.index in i.aliases]
                # TODO: MERGE THE mappings OF ALL candidates, DO NOT JUST PICK THE LAST ONE

                index = "dummy value"
                schema = wrap({"_routing": {}, "properties": {}})
                for _, ind in jx.sort(candidates, {"value": 0, "sort": -1}):
                    mapping = ind.mappings[self.settings.type]
                    set_default(schema._routing, mapping._routing)
                    schema.properties = _merge_mapping(schema.properties, mapping.properties)
            else:
                #FULLY DEFINED settings
                index = indices[self.settings.index]
                schema = index.mappings[self.settings.type]

            if index == None and retry:
                #TRY AGAIN, JUST IN CASE
                self.cluster.cluster_state = None
                return self.get_schema(retry=False)

            #TODO: REMOVE THIS BUG CORRECTION
            if not schema and self.settings.type == "test_result":
                schema = index.mappings["test_results"]
            # DONE BUG CORRECTION

            if not schema:
                Log.error(
                    "ElasticSearch index ({{index}}) does not have type ({{type}})",
                    index=self.settings.index,
                    type=self.settings.type
                )
            return schema
        else:
            mapping = self.cluster.get(self.path + "/_mapping")
            if not mapping[self.settings.type]:
                Log.error("{{index}} does not have type {{type}}", self.settings)
            return wrap({"mappings": mapping[self.settings.type]})
Ejemplo n.º 40
0
    def append_query(self, es_query, start):
        self.start = start

        es_field = self.query.frum.schema.leaves(self.var)[0].es_column
        for i, v in enumerate(self.values):
            es_query =  wrap({"aggs": {
                "_match": set_default({"terms": {
                    "script": 'doc['+quote(es_field)+'].values.contains(' + value2json(v) + ') ? 1 : 0'
                }}, es_query)
            }})

        return es_query
Ejemplo n.º 41
0
 def append_query(self, es_query, start):
     self.start = start
     for i, v in enumerate(self.fields):
         nest = wrap({
             "aggs": {
                 "_match":
                 set_default(
                     {"terms": {
                         "field": v,
                         "size": self.domain.limit
                     }}, es_query),
                 "_missing":
                 set_default({"filter": {
                     "missing": {
                         "field": v
                     }
                 }}, es_query)
             }
         })
         es_query = nest
     return es_query
Ejemplo n.º 42
0
def find_container(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not _meta:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, text_type):
        if not container.config.default.settings:
            Log.error(
                "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info"
            )

        type_ = None
        if frum.startswith("meta."):
            if frum == "meta.columns":
                return _meta.singlton.meta.columns.denormalized()
            elif frum == "meta.tables":
                return _meta.singlton.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)

        type_ = container.config.default.type
        fact_table_name = split_field(frum)[0]

        settings = set_default(
            {
                "index": fact_table_name,
                "name": frum,
                "exists": True,
            }, container.config.default.settings)
        settings.type = None
        return container.type2container[type_](settings)
    elif isinstance(
            frum,
            Mapping) and frum.type and container.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return container.type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"]
                                        or isinstance(frum["from"],
                                                      (list, set))):
        from jx_base.query import QueryOp
        return QueryOp.wrap(frum, namespace=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum
Ejemplo n.º 43
0
    def append_query(self, es_query, start):
        self.start = start
        domain = self.domain
        field = self.edge.value

        if isinstance(field, Variable):
            key = domain.key
            if isinstance(key, (tuple, list)) and len(key)==1:
                key = key[0]
            include = [p[key] for p in domain.partitions]
            if self.edge.allowNulls:

                return wrap({"aggs": {
                    "_match": set_default({"terms": {
                        "field": field.var,
                        "size": self.limit,
                        "include": include
                    }}, es_query),
                    "_missing": set_default(
                        {"filter": {"or": [
                            field.missing().to_esfilter(),
                            {"not": {"terms": {field.var: include}}}
                        ]}},
                        es_query
                    ),
                }})
            else:
                return wrap({"aggs": {
                    "_match": set_default({"terms": {
                        "field": field.var,
                        "size": self.limit,
                        "include": include
                    }}, es_query)
                }})
        else:
            include = [p[domain.key] for p in domain.partitions]
            if self.edge.allowNulls:

                return wrap({"aggs": {
                    "_match": set_default({"terms": {
                        "script_field": field.to_ruby(),
                        "size": self.limit,
                        "include": include
                    }}, es_query),
                    "_missing": set_default(
                        {"filter": {"or": [
                            field.missing().to_esfilter(),
                            NotOp("not", InOp("in", [field, Literal("literal", include)])).to_esfilter()
                        ]}},
                        es_query
                    ),
                }})
            else:
                return wrap({"aggs": {
                    "_match": set_default({"terms": {
                        "script_field": field.to_ruby(),
                        "size": self.limit,
                        "include": include
                    }}, es_query)
                }})
Ejemplo n.º 44
0
    def append_query(self, es_query, start):
        self.start = start

        value = self.edge.value.partial_eval()
        script = value.to_ruby(self.schema)
        exists = NotOp("not", script.miss).partial_eval()
        if not isinstance(self.edge.value, Variable):

            output = wrap({"aggs": {
                "_match": {
                    "filter": exists.to_esfilter(self.schema),
                    "aggs": {
                        "_filter": set_default(
                            {"terms": {
                                "script": script.expr,
                                "size": self.domain.limit,
                                "order": {"_term": self.sorted} if self.sorted else None
                            }},
                            es_query
                        )
                    }
                },
                "_missing": set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
        elif self.edge.value.var in [s.value.var for s in self.query.sort]:
            sort_dir = [s.sort for s in self.query.sort if s.value.var == self.edge.value.var][0]
            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "field": self.schema.leaves(self.edge.value.var)[0].es_column,
                        "size": self.domain.limit,
                        "order": {"_term": "asc" if sort_dir == 1 else "desc"}
                    }},
                    es_query
                ),
                "_missing": set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
        else:
            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "field": self.schema.leaves(self.edge.value.var)[0].es_column,
                        "size": self.domain.limit
                    }},
                    es_query
                ),
                "_missing": set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
Ejemplo n.º 45
0
def _replace_locals(node, doc_path):
    if is_data(node):
        # RECURS, DEEP COPY
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = v
            elif k == "$concat":
                if not is_sequence(v):
                    Log.error("$concat expects an array of strings")
                return coalesce(node.get("separator"), "").join(v)
            elif v == None:
                continue
            else:
                output[k] = _replace_locals(v, [v] + doc_path)

        if not ref:
            return output

        # REFER TO SELF
        frag = ref.fragment
        if frag[0] == ".":
            # RELATIVE
            for i, p in enumerate(frag):
                if p != ".":
                    if i > len(doc_path):
                        Log.error(
                            "{{frag|quote}} reaches up past the root document",
                            frag=frag)
                    new_value = get_attr(doc_path[i - 1], frag[i::])
                    break
            else:
                new_value = doc_path[len(frag) - 1]
        else:
            # ABSOLUTE
            new_value = get_attr(doc_path[-1], frag)

        new_value = _replace_locals(new_value, [new_value] + doc_path)

        if not output:
            return new_value  # OPTIMIZATION FOR CASE WHEN node IS {}
        else:
            return unwrap(set_default(output, new_value))

    elif is_list(node):
        candidate = [_replace_locals(n, [n] + doc_path) for n in node]
        # if all(p[0] is p[1] for p in zip(candidate, node)):
        #     return node
        return candidate

    return node
Ejemplo n.º 46
0
    def append_query(self, es_query, start):
        # TODO: USE "reverse_nested" QUERY TO PULL THESE
        self.start = start
        for i, v in enumerate(self.fields):
            exists = v.exists().partial_eval()
            nest = wrap({
                "aggs": {
                    "_match": {
                        "filter": exists.to_es14_filter(self.schema),
                        "aggs": {
                            "_filter":
                            set_default(
                                {
                                    "terms": {
                                        "field": first(
                                            self.schema.leaves(
                                                v.var)).es_column,
                                        "size": self.domain.limit
                                    }
                                }, es_query)
                        }
                    }
                }
            })
            nest.aggs._missing = set_default(
                {"filter": NotOp("not", exists).to_es14_filter(self.schema)},
                es_query)
            es_query = nest

        if self.domain.where:
            filter_ = self.domain.where.partial_eval().to_es14_filter(
                self.schema)
            es_query = {
                "aggs": {
                    "_filter": set_default({"filter": filter_}, es_query)
                }
            }

        return es_query
Ejemplo n.º 47
0
def _replace_locals(node, doc_path):
    if isinstance(node, Mapping):
        # RECURS, DEEP COPY
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = v
            elif v == None:
                continue
            else:
                output[k] = _replace_locals(v, [v] + doc_path)

        if not ref:
            return output

        # REFER TO SELF
        frag = ref.fragment
        if frag[0] == ".":
            # RELATIVE
            for i, p in enumerate(frag):
                if p != ".":
                    if i > len(doc_path):
                        Log.error(
                            "{{frag|quote}} reaches up past the root document",
                            frag=frag)
                    new_value = mo_dots.get_attr(doc_path[i - 1], frag[i::])
                    break
            else:
                new_value = doc_path[len(frag) - 1]
        else:
            # ABSOLUTE
            new_value = mo_dots.get_attr(doc_path[-1], frag)

        if new_value in doc_path:
            Log.error("encountered referential loop {{path|json}}",
                      path=[new_value] + doc_path)
        new_value = _replace_locals(new_value, [new_value] + doc_path)

        if not output:
            return new_value  # OPTIMIZATION FOR CASE WHEN node IS {}
        else:
            return unwrap(set_default(output, new_value))

    elif isinstance(node, list):
        candidate = [_replace_locals(n, [n] + doc_path) for n in node]
        # if all(p[0] is p[1] for p in zip(candidate, node)):
        #     return node
        return candidate

    return node
Ejemplo n.º 48
0
def find_container(frum):
    """
    :param frum:
    :return:
    """
    global namespace
    if not namespace:
        if not container.config.default.settings:
            Log.error(
                "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info"
            )
        namespace = ElasticsearchMetadata(container.config.default.settings)

    if is_text(frum):
        if frum in container_cache:
            return container_cache[frum]

        path = split_field(frum)
        if path[0] == "meta":
            if path[1] == "columns":
                return namespace.meta.columns.denormalized()
            elif path[1] == "tables":
                return namespace.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)

        type_ = container.config.default.type
        fact_table_name = path[0]

        settings = set_default(
            {
                "index": fact_table_name,
                "name": frum,
                "exists": True,
            }, container.config.default.settings)
        settings.type = None
        output = container.type2container[type_](settings)
        container_cache[frum] = output
        return output
    elif is_data(frum) and frum.type and container.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return container.type2container[frum.type](frum.settings)
    elif is_data(frum) and (frum["from"] or is_container(frum["from"])):
        from jx_base.query import QueryOp
        return QueryOp.wrap(frum)
    elif is_container(frum):
        return ListContainer("test_list", frum)
    else:
        return frum
Ejemplo n.º 49
0
    def append_query(self, es_query, start):
        # TODO: USE "reverse_nested" QUERY TO PULL THESE
        self.start = start
        for i, v in enumerate(self.fields):
            exists = v.exists().partial_eval()
            nest = wrap({"aggs": {"_match": {
                "filter": exists.to_esfilter(self.schema),
                "aggs": {"_filter": set_default({"terms": {
                    "field": self.schema.leaves(v.var)[0].es_column,
                    "size": self.domain.limit
                }}, es_query)}
            }}})
            nest.aggs._missing = set_default(
                {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                es_query
            )
            es_query = nest

        if self.domain.where:
            filter_ = self.domain.where.partial_eval().to_esfilter(self.schema)
            es_query = {"aggs": {"_filter": set_default({"filter": filter_}, es_query)}}

        return es_query
Ejemplo n.º 50
0
def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not _containers:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not _containers.config.default.settings:
            Log.error("expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info")

        type_ = None
        index = frum
        if frum.startswith("meta."):
            if frum == "meta.columns":
                return _meta.singlton.meta.columns.denormalized()
            elif frum == "meta.tables":
                return _meta.singlton.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)
        else:
            type_ = _containers.config.default.type
            index = split_field(frum)[0]

        settings = set_default(
            {
                "index": index,
                "name": frum,
                "exists": True,
            },
            _containers.config.default.settings
        )
        settings.type = None
        return _containers.type2container[type_](settings)
    elif isinstance(frum, Mapping) and frum.type and _containers.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return _containers.type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))):
        from pyLibrary.queries.query import QueryOp
        return QueryOp.wrap(frum, schema=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum
Ejemplo n.º 51
0
def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not _containers:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not _containers.config.default.settings:
            Log.error("expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info")

        type_ = None
        index = frum
        if frum.startswith("meta."):
            if frum == "meta.columns":
                return _meta.singlton.meta.columns
            elif frum == "meta.tables":
                return _meta.singlton.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)
        else:
            type_ = _containers.config.default.type
            index = join_field(split_field(frum)[:1:])

        settings = set_default(
            {
                "index": index,
                "name": frum,
                "exists": True,
            },
            _containers.config.default.settings
        )
        settings.type = None
        return _containers.type2container[type_](settings)
    elif isinstance(frum, Mapping) and frum.type and _containers.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return _containers.type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))):
        from pyLibrary.queries.query import QueryOp
        return QueryOp.wrap(frum, schema=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum
Ejemplo n.º 52
0
            def async(args, kwargs=None, *_args, **_kwargs):
                kwargs = set_default(kwargs, dict(zip(arg_names, args)))

                with self.id_lock:
                    id = self.next_id
                    self.next_id += 1

                mail = deepcopy(Data(
                    status=states.PENDING,
                    caller={
                        # "stack": extract_stack(1)
                    },
                    sender=set_default(_kwargs, opts),
                    message=kwargs,
                    request=set_default({"id": id})
                ))

                output = AsyncResult(id, mail=mail, app=self)
                with self.responses_lock:
                    self.responses[id] = output
                self.request_queue.add(value2json(mail))
                Log.note("Added {{id}} ({{name}}) to request queue\n{{request}}", id=id, name=opts.name, request=mail)
                return output
Ejemplo n.º 53
0
    def note(cls,
             template,
             default_params={},
             stack_depth=0,
             log_context=None,
             **more_params):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if not isinstance(template, text_type):
            Log.error("Log.note was expecting a unicode template")

        if len(template) > 10000:
            template = template[:10000]

        params = dict(unwrap(default_params), **more_params)

        log_params = set_default(
            {
                "template": template,
                "params": params,
                "timestamp": datetime.utcnow(),
                "machine": machine_metadata
            }, log_context, {"context": exceptions.NOTE})

        if not template.startswith("\n") and template.find("\n") > -1:
            template = "\n" + template

        if cls.trace:
            log_template = "{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" ({{location.method}}) - " + template.replace(
                "{{", "{{params.")
            f = sys._getframe(stack_depth + 1)
            log_params.location = {
                "line": f.f_lineno,
                "file": text_type(f.f_code.co_filename.split(os.sep)[-1]),
                "method": text_type(f.f_code.co_name)
            }
            thread = _Thread.current()
            log_params.thread = {"name": thread.name, "id": thread.id}
        else:
            log_template = "{{timestamp|datetime}} - " + template.replace(
                "{{", "{{params.")

        cls.main_log.write(log_template, log_params)
Ejemplo n.º 54
0
    def _convert_edge(self, edge):
        dim = self.dimensions[edge.value]
        if not dim:
            return edge

        if len(listwrap(dim.fields)) == 1:
            #TODO: CHECK IF EDGE DOMAIN AND DIMENSION DOMAIN CONFLICT
            new_edge = set_default({"value": unwraplist(dim.fields)}, edge)
            return new_edge
            new_edge.domain = dim.getDomain()

        edge = copy(edge)
        edge.value = None
        edge.domain = dim.getDomain()
        return edge
Ejemplo n.º 55
0
def get_branches(hg, branches, kwargs=None):
    # TRY ES
    cluster = elasticsearch.Cluster(branches)
    try:
        es = cluster.get_index(kwargs=branches, read_only=False)
        esq = jx_elasticsearch.new_instance(branches)
        found_branches = esq.query({
            "from": "branches",
            "format": "list",
            "limit": 10000
        }).data

        # IF IT IS TOO OLD, THEN PULL FROM HG
        oldest = Date(MAX(found_branches.etl.timestamp))
        if oldest == None or Date.now() - oldest > OLD_BRANCH:
            found_branches = _get_branches_from_hg(hg)
            es.extend({
                "id": b.name + " " + b.locale,
                "value": b
            } for b in found_branches)
            es.flush()

        try:
            return UniqueIndex(["name", "locale"],
                               data=found_branches,
                               fail_on_dup=False)
        except Exception as e:
            Log.error("Bad branch in ES index", cause=e)
    except Exception as e:
        e = Except.wrap(e)
        if "Can not find index " in e:
            set_default(branches, {"schema": branches_schema})
            es = cluster.get_or_create_index(branches)
            es.add_alias()
            return get_branches(kwargs)
        Log.error("problem getting branches", cause=e)
Ejemplo n.º 56
0
    def __init__(self, name, config):
        config = wrap(config)
        if config.debug.logs:
            Log.error("not allowed to configure logging on other process")

        self.process = Process(name, [PYTHON, "mo_threads" + os.sep + "python_worker.py"], shell=True)
        self.process.stdin.add(value2json(set_default({"debug": {"trace": True}}, config)))

        self.lock = Lock("wait for response from "+name)
        self.current_task = None
        self.current_response = None
        self.current_error = None

        self.daemon = Thread.run("", self._daemon)
        self.errors = Thread.run("", self._stderr)
Ejemplo n.º 57
0
    def append_query(self, es_query, start):
        self.start = start

        parts = self.edge.domain.partitions
        filters = []
        notty = []

        for p in parts:
            filters.append(AndOp("and", [p.where] + notty).to_esfilter())
            notty.append(NotOp("not", p.where))

        missing_filter = None
        if self.edge.allowNulls:  # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
            missing_filter = set_default(
                {"filter": AndOp("and", notty).to_esfilter()}, es_query)

        return wrap({
            "aggs": {
                "_match": set_default({"filters": {
                    "filters": filters
                }}, es_query),
                "_missing": missing_filter
            }
        })