def __init__(
        self,
        host,
        user,
        password,
        table,
        meta,       # REDSHIFT COPY COMMAND REQUIRES A BUCKET TO HOLD PARAMETERS
        database=None,
        port=5439,
        settings=None
    ):
        self.settings = settings
        self.db = Redshift(settings)
        INDEX_CACHE[settings.table] = wrap({"name":settings.table})  # HACK TO GET parse_columns TO WORK
        columns = parse_columns(settings.table, settings.mapping.test_result.properties)
        nested = [c.name for c in columns if c.type == "nested"]
        self.columns = wrap([c for c in columns if c.type not in ["object"] and not any(c.name.startswith(n+".") for n in nested)])

        try:
            self.db.execute("""
                CREATE TABLE {{table_name}} (
                    "_id" character varying UNIQUE,
                    {{columns}}
                )""", {
                "table_name": self.db.quote_column(settings.table),
                "columns": SQL(",\n".join(self.db.quote_column(c.name) + " " + self.db.es_type2pg_type(c.type) for c in self.columns))
            }, retry=False)
        except Exception, e:
            if "already exists" in e:
                Log.alert("Table {{table}} exists in Redshift",  table= settings.table)
            else:
                Log.error("Could not make table", e)
Exemple #2
0
 def convert(self, expr):
     """
     ADD THE ".$value" SUFFIX TO ALL VARIABLES
     """
     if expr is True or expr == None or expr is False:
         return expr
     elif Math.is_number(expr):
         return expr
     elif expr == ".":
         return "."
     elif is_keyword(expr):
         #TODO: LOOKUP SCHEMA AND ADD ALL COLUMNS WITH THIS PREFIX
         return expr + ".$value"
     elif isinstance(expr, basestring):
         Log.error("{{name|quote}} is not a valid variable name", name=expr)
     elif isinstance(expr, Date):
         return expr
     elif isinstance(expr, Query):
         return self._convert_query(expr)
     elif isinstance(expr, Mapping):
         if expr["from"]:
             return self._convert_query(expr)
         elif len(expr) >= 2:
             #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
             return wrap({name: self.convert(value) for name, value in expr.items()})
         else:
             # ASSUME SINGLE-CLAUSE EXPRESSION
             k, v = expr.items()[0]
             return self.converter_map.get(k, self._convert_bop)(k, v)
     elif isinstance(expr, (list, set, tuple)):
         return wrap([self.convert(value) for value in expr])
Exemple #3
0
    def __init__(self, value):
        self.scheme = None
        self.host = None
        self.port = None
        self.path = ""
        self.query = ""
        self.fragment = ""

        if value == None:
            return

        if not _convert:
            _late_import()
        if value.startswith("file://") or value.startswith("//"):
            # urlparse DOES NOT WORK IN THESE CASES
            scheme, suffix = value.split("//")
            self.scheme = scheme.rstrip(":")
            parse(self, suffix, 0, 1)

            self.query = wrap(_convert.url_param2value(self.query))
            self.fragment = self.fragment
        else:
            output = urlparse(value)
            self.scheme = output.scheme
            self.port = output.port
            self.host = output.netloc.split(":")[0]
            self.path = output.path
            self.query = wrap(_convert.url_param2value(output.query))
            self.fragment = output.fragment
Exemple #4
0
    def append_query(self, es_query, start):
        self.start = start
        domain = self.domain
        field = self.edge.value

        if isinstance(field, Variable):
            key = domain.key
            if isinstance(key, (tuple, list)) and len(key)==1:
                key = key[0]
            include = [p[key] for p in domain.partitions]
            if self.edge.allowNulls:

                return wrap({"aggs": {
                    "_match": set_default({"terms": {
                        "field": field.var,
                        "size": self.limit,
                        "include": include
                    }}, es_query),
                    "_missing": set_default(
                        {"filter": {"or": [
                            field.missing().to_esfilter(),
                            {"not": {"terms": {field.var: include}}}
                        ]}},
                        es_query
                    ),
                }})
            else:
                return wrap({"aggs": {
                    "_match": set_default({"terms": {
                        "field": field.var,
                        "size": self.limit,
                        "include": include
                    }}, es_query)
                }})
        else:
            include = [p[domain.key] for p in domain.partitions]
            if self.edge.allowNulls:

                return wrap({"aggs": {
                    "_match": set_default({"terms": {
                        "script_field": field.to_ruby(),
                        "size": self.limit,
                        "include": include
                    }}, es_query),
                    "_missing": set_default(
                        {"filter": {"or": [
                            field.missing().to_esfilter(),
                            NotOp("not", InOp("in", [field, Literal("literal", include)])).to_esfilter()
                        ]}},
                        es_query
                    ),
                }})
            else:
                return wrap({"aggs": {
                    "_match": set_default({"terms": {
                        "script_field": field.to_ruby(),
                        "size": self.limit,
                        "include": include
                    }}, es_query)
                }})
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False):
    """
        return list of (keys, values) pairs where
            group by the set of keys
            values IS LIST OF ALL data that has those keys
        contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    """

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    if isinstance(data, Container):
        return data.groupby(keys)

    try:
        keys = listwrap(keys)
        get_key = jx_expression_to_function(keys)
        if not contiguous:
            data = sorted(data, key=get_key)

        return ((wrap({k: v for k, v in zip(keys, g)}), wrap(v)) for g, v in itertools.groupby(data, get_key))
    except Exception, e:
        Log.error("Problem grouping", e)
Exemple #6
0
def es_query_template(path):
    """
    RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
    :param path:
    :return:
    """
    sub_path = split_field(path)[1:]

    if sub_path:
        f0 = {}
        f1 = {}
        output = wrap(
            {
                "filter": {
                    "and": [
                        f0,
                        {"nested": {"path": join_field(sub_path), "filter": f1, "inner_hits": {"size": 100000}}},
                    ]
                },
                "from": 0,
                "size": 0,
                "sort": [],
            }
        )
        return output, wrap([f0, f1])
    else:
        f0 = {}
        output = wrap({"query": {"filtered": {"filter": f0}}, "from": 0, "size": 0, "sort": []})
        return output, wrap([f0])
Exemple #7
0
    def append_query(self, es_query, start):
        self.start = start

        if not isinstance(self.edge.value, Variable):
            script_field = self.edge.value.to_ruby()
            missing = self.edge.value.missing().to_esfilter()

            output = wrap(
                {
                    "aggs": {
                        "_match": set_default(
                            {"terms": {"script_field": script_field, "size": self.domain.limit}}, es_query
                        ),
                        "_missing": set_default({"filter": missing}, es_query),
                    }
                }
            )
            return output

        output = wrap(
            {
                "aggs": {
                    "_match": set_default(
                        {"terms": {"field": self.edge.value.var, "size": self.domain.limit}}, es_query
                    ),
                    "_missing": set_default({"missing": {"field": self.edge.value}}, es_query),
                }
            }
        )
        return output
    def map(self, map_):
        def map_select(s, map_):
            return set_default({"value": s.value.map(map_)}, s)

        def map_edge(e, map_):
            partitions = unwraplist([set_default({"where": p.where.map(map_)}, p) for p in e.domain.partitions])

            domain = copy(e.domain)
            domain.where = e.domain.where.map(map_)
            domain.partitions = partitions

            edge = copy(e)
            edge.value = e.value.map(map_)
            edge.domain = domain
            if e.range:
                edge.range.min = e.range.min.map(map_)
                edge.range.max = e.range.max.map(map_)
            return edge

        return QueryOp(
            "from",
            frum=self.frum.map(map_),
            select=wrap([map_select(s, map_) for s in listwrap(self.select)]),
            edges=wrap([map_edge(e, map_) for e in self.edges]),
            groupby=wrap([g.map(map_) for g in self.groupby]),
            window=wrap([w.map(map_) for w in self.window]),
            where=self.where.map(map_),
            sort=wrap([map_select(s, map_) for s in listwrap(self.sort)]),
            limit=self.limit,
            format=self.format,
        )
Exemple #9
0
    def _aggs_iterator(agg, d):
        agg = drill(agg)

        if d > 0:
            for b in agg.get("_match", EMPTY).get("buckets", EMPTY_LIST):
                parts[d] = wrap(b)
                for a in _aggs_iterator(b, d - 1):
                    yield a
            parts[d] = Null
            for b in agg.get("_other", EMPTY).get("buckets", EMPTY_LIST):
                for a in _aggs_iterator(b, d - 1):
                    yield a
            b = drill(agg.get("_missing", EMPTY))
            if b.get("doc_count"):
                for a in _aggs_iterator(b, d - 1):
                    yield a
        else:
            for b in agg.get("_match", EMPTY).get("buckets", EMPTY_LIST):
                parts[d] = wrap(b)
                b = drill(b)
                if b.get("doc_count"):
                    yield b
            parts[d] = Null
            for b in agg.get("_other", EMPTY).get("buckets", EMPTY_LIST):
                b = drill(b)
                if b.get("doc_count"):
                    yield b
            b = drill(agg.get("_missing", EMPTY))
            if b.get("doc_count"):
                yield b
Exemple #10
0
    def append_query(self, es_query, start):
        self.start = start

        if not isinstance(self.edge.value, Variable):
            script_field = self.edge.value.to_ruby()
            missing = self.edge.value.missing()

            output = wrap(
                {
                    "aggs": {
                        "_match": set_default(
                            {"terms": {"script_field": script_field, "size": self.domain.limit}}, es_query
                        ),
                        "_missing": set_default({"filter": missing.to_esfilter()}, es_query) if missing else None,
                    }
                }
            )
            return output

        output = wrap(
            {
                "aggs": {
                    "_match": set_default(
                        {"terms": {"field": self.edge.value.var, "size": self.domain.limit}}, es_query
                    ),
                    "_missing": set_default(
                        {"missing": {"field": self.edge.value}}, es_query
                    ),  # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
                }
            }
        )
        return output
Exemple #11
0
    def __init__(self, host, index, alias=None, name=None, port=9200, settings=None):
        global _elasticsearch
        if hasattr(self, "settings"):
            return

        from pyLibrary.queries.containers.lists import ListContainer
        from pyLibrary.env import elasticsearch as _elasticsearch

        self.settings = settings
        self.default_name = coalesce(name, alias, index)
        self.default_es = _elasticsearch.Cluster(settings=settings)
        self.todo = Queue("refresh metadata", max=100000, unique=True)

        self.meta=Dict()
        table_columns = metadata_tables()
        column_columns = metadata_columns()
        self.meta.tables = ListContainer("meta.tables", [], wrap({c.name: c for c in table_columns}))
        self.meta.columns = ListContainer("meta.columns", [], wrap({c.name: c for c in column_columns}))
        self.meta.columns.insert(column_columns)
        self.meta.columns.insert(table_columns)
        # TODO: fix monitor so it does not bring down ES
        if ENABLE_META_SCAN:
            self.worker = Thread.run("refresh metadata", self.monitor)
        else:
            self.worker = Thread.run("refresh metadata", self.not_monitor)
        return
Exemple #12
0
def get(url):
    """
    USE json.net CONVENTIONS TO LINK TO INLINE OTHER JSON
    """
    if not _Log:
        _late_import()

    if url.find("://") == -1:
        _Log.error("{{url}} must have a prototcol (eg http://) declared", url=url)

    base = URL("")
    if url.startswith("file://") and url[7] != "/":
        if os.sep=="\\":
            base = URL("file:///" + os.getcwd().replace(os.sep, "/").rstrip("/") + "/.")
        else:
            base = URL("file://" + os.getcwd().rstrip("/") + "/.")
    elif url[url.find("://") + 3] != "/":
        _Log.error("{{url}} must be absolute", url=url)

    phase1 = _replace_ref(wrap({"$ref": url}), base)  # BLANK URL ONLY WORKS IF url IS ABSOLUTE
    try:
        phase2 = _replace_locals(phase1, [phase1])
        return wrap(phase2)
    except Exception, e:
        _Log.error("problem replacing locals in\n{{phase1}}", phase1=phase1)
Exemple #13
0
    def __getitem__(self, key):
        if key == None:
            return Null
        if key == ".":
            output = _get(self, "_dict")
            if isinstance(output, Mapping):
                return self
            else:
                return output

        if isinstance(key, str):
            key = key.decode("utf8")
        elif not isinstance(key, unicode):
            from pyLibrary.debugs.logs import Log
            Log.error("only string keys are supported")

        d = _get(self, "_dict")

        if key.find(".") >= 0:
            seq = _split_field(key)
            for n in seq:
                if isinstance(d, NullType):
                    d = NullType(d, n)  # OH DEAR, Null TREATS n AS PATH, NOT LITERAL
                else:
                    d = _getdefault(d, n)  # EVERYTHING ELSE TREATS n AS LITERAL

            return wrap(d)
        else:
            o = d.get(key)

        if o == None:
            return NullType(d, key)
        return wrap(o)
    def __init__(self, value):
        if not _convert:
            _late_import()

        try:
            self.scheme = None
            self.host = None
            self.port = None
            self.path = ""
            self.query = ""
            self.fragment = ""

            if value == None:
                return

            if value.startswith("file://") or value.startswith("//"):
                # urlparse DOES NOT WORK IN THESE CASES
                scheme, suffix = value.split("//")
                self.scheme = scheme.rstrip(":")
                parse(self, suffix, 0, 1)
                self.query = wrap(_convert.url_param2value(self.query))
            else:
                output = urlparse(value)
                self.scheme = output.scheme
                self.port = output.port
                self.host = output.netloc.split(":")[0]
                self.path = output.path
                self.query = wrap(_convert.url_param2value(output.query))
                self.fragment = output.fragment
        except Exception, e:
            _Log.error("problem parsing {{value}} to URL", value=value, cause=e)
Exemple #15
0
    def metas(self, prefix=None, limit=None, delimiter=None):
        """
        RETURN THE METADATA DESCRIPTORS FOR EACH KEY
        """

        keys = self.bucket.list(prefix=prefix, delimiter=delimiter)
        if limit:
            output = []
            for i, k in enumerate(keys):
                output.append({
                    "key": strip_extension(k.key),
                    "etag": convert.quote2string(k.etag),
                    "expiry_date": Date(k.expiry_date),
                    "last_modified": Date(k.last_modified)
                })
                if i >= limit:
                    break
            return wrap(output)

        output = [
            {
                "key": strip_extension(k.key),
                "etag": convert.quote2string(k.etag),
                "expiry_date": Date(k.expiry_date),
                "last_modified": Date(k.last_modified)
            }
            for k in keys
        ]
        return wrap(output)
Exemple #16
0
 def convert(self, expr):
     """
     EXPAND INSTANCES OF name TO value
     """
     if expr is True or expr == None or expr is False:
         return expr
     elif Math.is_number(expr):
         return expr
     elif expr == ".":
         return "."
     elif is_keyword(expr):
         return coalesce(self.dimensions[expr], expr)
     elif isinstance(expr, basestring):
         Log.error("{{name|quote}} is not a valid variable name", name=expr)
     elif isinstance(expr, Date):
         return expr
     elif isinstance(expr, Query):
         return self._convert_query(expr)
     elif isinstance(expr, Mapping):
         if expr["from"]:
             return self._convert_query(expr)
         elif len(expr) >= 2:
             #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
             return wrap({name: self.convert(value) for name, value in expr.leaves()})
         else:
             # ASSUME SINGLE-CLAUSE EXPRESSION
             k, v = expr.items()[0]
             return converter_map.get(k, self._convert_bop)(self, k, v)
     elif isinstance(expr, (list, set, tuple)):
         return wrap([self.convert(value) for value in expr])
     else:
         return expr
Exemple #17
0
def _select_a_field(field):
    if isinstance(field, basestring):
        return wrap({"name": field, "value": split_field(field)})
    elif isinstance(wrap(field).value, basestring):
        field = wrap(field)
        return wrap({"name": field.name, "value": split_field(field.value)})
    else:
        return wrap({"name": field.name, "value": field.value})
 def search(self, query):
     query = wrap(query)
     f = jx.get(query.query.filtered.filter)
     filtered = wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)])
     if query.fields:
         return wrap({"hits": {"total": len(filtered), "hits": [{"_id": d._id, "fields": unwrap(jx.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}})
     else:
         return wrap({"hits": {"total": len(filtered), "hits": filtered}})
Exemple #19
0
def run(query, frum=None):
    """
    THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER,
    BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer
    """
    query = QueryOp.wrap(query, frum.schema)
    frum = coalesce(frum, query["from"])
    if isinstance(frum, Container):
        return frum.query(query)
    elif isinstance(frum, (list, set, GeneratorType)):
        frum = wrap(list(frum))
    elif isinstance(frum, Cube):
        if is_aggs(query):
            return cube_aggs(frum, query)

    elif isinstance(frum, QueryOp):
        frum = run(frum)
    else:
        Log.error("Do not know how to handle {{type}}",  type=frum.__class__.__name__)

    if is_aggs(query):
        frum = list_aggs(frum, query)
    else:  # SETOP
        # try:
        #     if query.filter != None or query.esfilter != None:
        #         Log.error("use 'where' clause")
        # except AttributeError:
        #     pass

        if query.where is not TRUE_FILTER:
            frum = filter(frum, query.where)

        if query.sort:
            frum = sort(frum, query.sort, already_normalized=True)

        if query.select:
            frum = select(frum, query.select)

    if query.window:
        if isinstance(frum, Cube):
            frum = list(frum.values())

        for param in query.window:
            window(frum, param)

    # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT
    if query.format == "cube":
        frum = convert.list2cube(frum)
    elif query.format == "table":
        frum = convert.list2table(frum)
        frum.meta.format = "table"
    else:
        frum = wrap({
            "meta": {"format": "list"},
            "data": frum
        })

    return frum
Exemple #20
0
        def iter(data, depth):
            if depth == 0:
                for v in data:
                    yield wrap(v)
                return

            for v in data.values():
                for v1 in iter(v, depth - 1):
                    yield wrap(v1)
Exemple #21
0
    def __init__(self, select, edges, data, frum=None):
        """
        data IS EXPECTED TO BE A dict TO MATRICES, BUT OTHER COLLECTIONS ARE
        ALLOWED, USING THE select AND edges TO DESCRIBE THE data
        """

        self.is_value = False if isinstance(select, list) else True
        self.select = select
        self.meta = Dict(format="cube")  # PUT EXTRA MARKUP HERE
        self.is_none = False

        if not all(data.values()):
            is_none = True

        # ENSURE frum IS PROPER FORM
        if isinstance(select, list):
            if edges and OR(not isinstance(v, Matrix) for v in data.values()):
                Log.error("Expecting data to be a dict with Matrix values")

        if not edges:
            if not data:
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix.ZERO}
                self.edges = DictList.EMPTY
            elif isinstance(data, Mapping):
                # EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA
                length = MAX([len(v) for v in data.values()])
                if length >= 1:
                    self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum"}}])
                else:
                    self.edges = DictList.EMPTY
            elif isinstance(data, list):
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix.wrap(data)}
                self.edges = wrap(
                    [{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(data), "interval": 1}}]
                )
            elif isinstance(data, Matrix):
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: data}
            else:
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix(value=data)}
                self.edges = DictList.EMPTY
        else:
            self.edges = wrap(edges)

        self.data = data
 def get_metadata(self):
     if self.settings.explore_metadata:
         if not self.cluster_metadata:
             response = self.get("/_cluster/state")
             self.cluster_metadata = wrap(response.metadata)
             self.cluster_state = wrap(self.get("/"))
             self.version = self.cluster_state.version.number
     else:
         Log.error("Metadata exploration has been disabled")
     return self.cluster_metadata
Exemple #23
0
def list2tab(rows):
    columns = set()
    for r in wrap(rows):
        columns |= set(k for k, v in r.leaves())
    keys = list(columns)

    output = []
    for r in wrap(rows):
        output.append("\t".join(value2json(r[k]) for k in keys))

    return "\t".join(keys) + "\n" + "\n".join(output)
Exemple #24
0
    def __getitem__(self, item):
        # TODO: SOLVE FUNDAMENTAL QUESTION OF IF SELECTING A PART OF AN
        # EDGE REMOVES THAT EDGE FROM THIS RESULT, OR ADDS THE PART
        # AS A select {"name":edge.name, "value":edge.domain.partitions[coord]}
        # PROBABLY NOT, THE value IS IDENTICAL OVER THE REMAINING
        if isinstance(item, Mapping):
            coordinates = [None] * len(self.edges)

            # MAP DICT TO NUMERIC INDICES
            for name, v in item.items():
                ei, parts = wrap([(i, e.domain.partitions) for i, e in enumerate(self.edges) if e.name == name])[0]
                if not parts:
                    Log.error(
                        "Can not find {{name}}=={{value|quote}} in list of edges, maybe this feature is not implemented yet",
                        name=name,
                        value=v,
                    )
                part = wrap([p for p in parts if p.value == v])[0]
                if not part:
                    return Null
                else:
                    coordinates[ei] = part.dataIndex

            edges = [e for e, v in zip(self.edges, coordinates) if v is None]
            if not edges:
                # ZERO DIMENSIONAL VALUE
                return wrap({k: v.__getitem__(coordinates) for k, v in self.data.items()})
            else:
                output = Cube(
                    select=self.select,
                    edges=wrap([e for e, v in zip(self.edges, coordinates) if v is None]),
                    data={k: Matrix(values=c.__getitem__(coordinates)) for k, c in self.data.items()},
                )
                return output
        elif isinstance(item, basestring):
            # RETURN A VALUE CUBE
            if self.is_value:
                if item != self.select.name:
                    Log.error("{{name}} not found in cube", name=item)
                return self

            if item not in self.select.name:
                Log.error("{{name}} not found in cube", name=item)

            output = Cube(
                select=[s for s in self.select if s.name == item][0], edges=self.edges, data={item: self.data[item]}
            )
            return output
        else:
            Log.error("not implemented yet")
 def __getitem__(self, key):
     try:
         _key = value2key(self._keys, key)
         if len(self._keys) == 1 or len(_key) == len(self._keys):
             d = self._data.get(_key)
             return wrap(d)
         else:
             output = wrap([
                 d
                 for d in self._data.values()
                 if all(wrap(d)[k] == v for k, v in _key.items())
             ])
             return output
     except Exception, e:
         Log.error("something went wrong", e)
Exemple #26
0
def list2cube(rows, column_names=None):
    if column_names:
        keys = column_names
    else:
        columns = set()
        for r in rows:
            columns |= set(r.keys())
        keys = list(columns)

    data = {k: [] for k in keys}
    output = wrap({
        "meta": {"format": "cube"},
        "edges": [
            {
                "name": "rownum",
                "domain": {"type": "rownum", "min": 0, "max": len(rows), "interval": 1}
            }
        ],
        "data": data
    })

    for r in rows:
        for k in keys:
            data[k].append(unwraplist(r[k]))

    return output
Exemple #27
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        self.type = "range"
        self.NULL = Null

        if self.partitions:
            # IGNORE THE min, max, interval
            if not self.key:
                Log.error("Must have a key value")

            parts = listwrap(self.partitions)
            for i, p in enumerate(parts):
                self.min = Math.min(self.min, p.min)
                self.max = Math.max(self.max, p.max)
                if p.dataIndex != None and p.dataIndex != i:
                    Log.error("Expecting `dataIndex` to agree with the order of the parts")
                if p[self.key] == None:
                    Log.error("Expecting all parts to have {{key}} as a property", key=self.key)
                p.dataIndex = i

            # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
            for p, q in itertools.product(parts, parts):
                if p.min <= q.min and q.min < p.max:
                    Log.error("partitions overlap!")

            self.partitions = parts
            return
        elif any([self.min == None, self.max == None, self.interval == None]):
            Log.error("Can not handle missing parameter")

        self.key = "min"
        self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(frange(self.min, self.max, self.interval))])
Exemple #28
0
    def __getattribute__(self, key):
        if key == b"__class__":
            return NullType
        key = key.decode('utf8')

        d = _get(self, "__dict__")
        o = wrap(d["_obj"])
        k = d["__key__"]
        if o is None:
            return Null
        elif isinstance(o, NullType):
            return NullType(self, key)
        v = o.get(k)
        if v == None:
            return NullType(self, key)
        return wrap(v).get(key)
Exemple #29
0
    def error(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        **more_params
    ):
        """
        raise an exception with a trace for the cause too

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        add_to_trace = False
        cause = wrap(unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)]))
        trace = exceptions.extract_stack(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(exceptions.ERROR, template, params, cause, trace)
        raise e
Exemple #30
0
def json2value(json_string, params={}, flexible=False, leaves=False):
    """
    :param json_string: THE JSON
    :param params: STANDARD JSON PARAMS
    :param flexible: REMOVE COMMENTS
    :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED
    :return: Python value
    """
    if isinstance(json_string, str):
        Log.error("only unicode json accepted")

    try:
        if flexible:
            # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r
            # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58
            json_string = re.sub(r"\"\"\".*?\"\"\"", r"\n", json_string, flags=re.MULTILINE)
            json_string = "\n".join(remove_line_comment(l) for l in json_string.split("\n"))
            # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA
            json_string = re.sub(r",\s*\}", r"}", json_string)
            # ALLOW LISTS TO END WITH COMMA
            json_string = re.sub(r",\s*\]", r"]", json_string)

        if params:
            # LOOKUP REFERENCES
            json_string = expand_template(json_string, params)

        try:
            value = wrap(json_decoder(unicode(json_string)))
        except Exception, e:
            Log.error("can not decode\n{{content}}", content=json_string, cause=e)

        if leaves:
            value = wrap_leaves(value)

        return value
Exemple #31
0
 def as_dict(self):
     output = wrap({s: getattr(self, s) for s in QueryOp.__slots__})
     return output
Exemple #32
0
 def __init__(self, description, param=None, debug=True, silent=False):
     self.template = description
     self.param = wrap(coalesce(param, {}))
     self.debug = debug
     self.silent = silent
     self.interval = None
Exemple #33
0
 def iteritems(self):
     # LOW LEVEL ITERATION, NO WRAPPING
     d = _get(self, "_dict")
     return ((k, wrap(v)) for k, v in d.iteritems())
Exemple #34
0
 def get(self, key, default=None):
     return wrap(dict.get(self, key, default))
Exemple #35
0
 def iteritems(self):
     for k, v in dict.iteritems(self):
         yield k, wrap(v)
    def _normalize_job_result(self, branch, revision, job, details, notes,
                              stars):
        output = Dict()
        try:
            job = wrap(copy(job))

            # ORGANIZE PROPERTIES
            output.build.architecture = _scrub(job, "build_architecture")
            output.build.os = _scrub(job, "build_os")
            output.build.platform = _scrub(job, "build_platform")
            output.build.type = _scrub(job, "platform_option")

            output.build_system_type = _scrub(job, "build_system_type")

            output.job.id = _scrub(job, "id")
            output.job.guid = _scrub(job, "job_guid")
            if job.job_group_symbol != "?":
                output.job.group.name = _scrub(job, "job_group_name")
                output.job.group.description = _scrub(job,
                                                      "job_group_description")
                output.job.group.symbol = _scrub(job, "job_group_symbol")
            else:
                job.job_group_name = None
                job.job_group_description = None
                job.job_group_symbol = None
            output.job.type.description = _scrub(job, "job_type_description")
            output.job.type.name = _scrub(job, "job_type_name")
            output.job.type.symbol = _scrub(job, "job_type_symbol")

            output.ref_data_name = _scrub(job, "ref_data_name")

            output.machine.name = _scrub(job, "machine_name")
            if Math.is_integer(output.machine.name.split("-")[-1]):
                output.machine.pool = "-".join(
                    output.machine.name.split("-")[:-1])
            output.machine.platform = _scrub(job,
                                             "machine_platform_architecture")
            output.machine.os = _scrub(job, "machine_platform_os")

            output.job.reason = _scrub(job, "reason")
            output.job.state = _scrub(job, "state")
            output.job.tier = _scrub(job, "tier")
            output.job.who = _scrub(job, "who")
            output.job.result = _scrub(job, "result")

            fcid = _scrub(job, "failure_classification_id")
            if fcid not in [0, 1]:  # 0 is unknown, and 1 is "not classified"
                output.job.failure_classification = self.failure_classification.get(
                    fcid)

            if job.result_set:
                output.repo.push_date = job.result_set.push_timestamp
                output.repo.branch = self.repo[job.result_set.repository_id]
                output.repo.revision = job.result_set.revision
            else:
                output.repo.branch = branch
                output.repo.revision = revision
                output.repo.revision12 = revision[:12]
            output.job.timing.submit = Date(_scrub(job, "submit_timestamp"))
            output.job.timing.start = Date(_scrub(job, "start_timestamp"))
            output.job.timing.end = Date(_scrub(job, "end_timestamp"))
            output.job.timing.last_modified = Date(_scrub(
                job, "last_modified"))

            # IGNORED
            job.job_group_id = None
            job.job_type_id = None
            job.result_set = None
            job.build_platform_id = None
            job.job_coalesced_to_guid = None
            job.option_collection_hash = None
            job.platform = None
            job.result_set_id = None
            job.running_eta = None
            job.signature = None

            if job.keys():
                Log.error("{{names|json}} are not used", names=job.keys())

            # ATTACH DETAILS (AND SCRUB OUT REDUNDANT VALUES)
            output.details = details.get(output.job.guid, Null)
            for d in output.details:
                d.job_guid = None
                d.job_id = None

            output.task.id = coalesce(
                *map(_extract_task_id, output.details.url))

            # ATTACH NOTES (RESOLVED BY BUG...)
            for n in notes.get(output.job.id, Null):
                note = coalesce(n.note.strip(), n.text.strip())
                if note:
                    # LOOK UP REVISION IN REPO
                    fix = re.findall(r'[0-9A-Fa-f]{12}', note)
                    if fix:
                        rev = self.hg.get_revision(
                            Dict(changeset={"id": fix[0]},
                                 branch={"name": branch}))
                        n.revision = rev.changeset.id
                        n.bug_id = self.hg._extract_bug_id(
                            rev.changeset.description)
                else:
                    note = None

                output.notes += [{
                    "note":
                    note,
                    "status":
                    coalesce(n.active_status, n.status),
                    "revision":
                    n.revision,
                    "bug_id":
                    n.bug_id,
                    "who":
                    n.who,
                    "failure_classification":
                    self.failure_classification[n.failure_classification_id],
                    "timestamp":
                    Date(coalesce(n.note_timestamp, n.timestamp, n.created))
                }]

            # ATTACH STAR INFO
            for s in stars.get(output.job.id, Null):
                # LOOKUP BUG DETAILS
                output.stars += [{
                    "bug_id": s.bug_id,
                    "who": s.who,
                    "timestamp": s.submit_timestamp
                }]

            output.etl = {"timestamp": Date.now()}
            return output
        except Exception, e:
            Log.error("Problem with normalization of job {{job_id}}",
                      job_id=coalesce(output.job.id, job.id),
                      cause=e)
Exemple #37
0
def DataClass(name, columns):
    """
    Each column has {"name", "required", "nulls", "default", "type"} properties
    """

    columns = wrap([{
        "name": c,
        "required": True,
        "nulls": False,
        "type": object
    } if isinstance(c, basestring) else c for c in columns])
    slots = columns.name
    required = wrap(
        filter(lambda c: c.required and not c.nulls and not c.default,
               columns)).name
    nulls = wrap(filter(lambda c: c.nulls, columns)).name
    types = {c.name: coalesce(c.type, object) for c in columns}

    code = expand_template(
        """
from __future__ import unicode_literals
from collections import Mapping

meta = None
types_ = {{types}}

class {{name}}(Mapping):
    __slots__ = {{slots}}

    def __init__(self, **kwargs):
        if not kwargs:
            return

        for s in {{slots}}:
            setattr(self, s, kwargs.get(s, kwargs.get('default', Null)))

        missed = {{required}}-set(kwargs.keys())
        if missed:
            Log.error("Expecting properties {"+"{missed}}", missed=missed)

        illegal = set(kwargs.keys())-set({{slots}})
        if illegal:
            Log.error("{"+"{names}} are not a valid properties", names=illegal)

    def __getitem__(self, item):
        return getattr(self, item)

    def __setitem__(self, item, value):
        setattr(self, item, value)
        return self

    def __setattr__(self, item, value):
        if item not in {{slots}}:
            Log.error("{"+"{item|quote}} not valid attribute", item=item)
        #if not isinstance(value, types_[item]):
        #    Log.error("{"+"{item|quote}} not of type "+"{"+"{type}}", item=item, type=types_[item])
        object.__setattr__(self, item, value)

    def __getattr__(self, item):
        Log.error("{"+"{item|quote}} not valid attribute", item=item)

    def __hash__(self):
        return object.__hash__(self)

    def __eq__(self, other):
        if isinstance(other, {{name}}) and dict(self)==dict(other) and self is not other:
            Log.error("expecting to be same object")
        return self is other

    def __dict__(self):
        return {k: getattr(self, k) for k in {{slots}}}

    def items(self):
        return ((k, getattr(self, k)) for k in {{slots}})

    def __copy__(self):
        _set = object.__setattr__
        output = object.__new__({{name}})
        {{assign}}
        return output

    def __iter__(self):
        return {{slots}}.__iter__()

    def __len__(self):
        return {{len_slots}}

    def __str__(self):
        return str({{dict}})

temp = {{name}}
""", {
            "name":
            name,
            "slots":
            "(" + (", ".join(convert.value2quote(s) for s in slots)) + ")",
            "required":
            "{" + (", ".join(convert.value2quote(s) for s in required)) + "}",
            "nulls":
            "{" + (", ".join(convert.value2quote(s) for s in nulls)) + "}",
            "len_slots":
            len(slots),
            "dict":
            "{" +
            (", ".join(convert.value2quote(s) + ": self." + s
                       for s in slots)) + "}",
            "assign":
            "; ".join(
                "_set(output, " + convert.value2quote(s) + ", self." + s + ")"
                for s in slots),
            "types":
            "{" + (",".join(
                convert.string2quote(k) + ": " + v.__name__
                for k, v in types.items())) + "}"
        })

    return _exec(code, name)
Exemple #38
0
    def pe_filter(filter, data, depth):
        """
        PARTIAL EVALUATE THE filter BASED ON data GIVEN
        """
        if filter is TRUE_FILTER:
            return True
        if filter is FALSE_FILTER:
            return False

        filter = wrap(filter)

        if filter["and"]:
            result = True
            output = DictList()
            for a in filter[u"and"]:
                f = pe_filter(a, data, depth)
                if f is False:
                    result = False
                elif f is not True:
                    output.append(f)
            if result and output:
                return {"and": output}
            else:
                return result
        elif filter["or"]:
            output = DictList()
            for o in filter[u"or"]:
                f = pe_filter(o, data, depth)
                if f is True:
                    return True
                elif f is not False:
                    output.append(f)
            if output:
                return {"or": output}
            else:
                return False
        elif filter["not"]:
            f = pe_filter(filter["not"], data, depth)
            if f is True:
                return False
            elif f is False:
                return True
            else:
                return {"not": f}
        elif filter.term or filter.eq:
            eq = coalesce(filter.term, filter.eq)
            result = True
            output = {}
            for col, val in eq.items():
                first, rest = parse_field(col, data, depth)
                d = data[first]
                if not rest:
                    if d != val:
                        result = False
                else:
                    output[rest] = val

            if result and output:
                return {"term": output}
            else:
                return result
        elif filter.equal:
            a, b = filter["equal"]
            first_a, rest_a = parse_field(a, data, depth)
            first_b, rest_b = parse_field(b, data, depth)
            val_a = data[first_a]
            val_b = data[first_b]
            if not rest_a:
                if not rest_b:
                    if val_a != val_b:
                        return False
                    else:
                        return True
                else:
                    return {"term": {rest_b: val_a}}
            else:
                if not rest_b:
                    return {"term": {rest_a: val_b}}
                else:
                    return {"equal": [rest_a, rest_b]}

        elif filter.terms:
            result = True
            output = {}
            for col, vals in filter["terms"].items():
                first, rest = parse_field(col, data, depth)
                d = data[first]
                if not rest:
                    if d not in vals:
                        result = False
                else:
                    output[rest] = vals
            if result and output:
                return {"terms": output}
            else:
                return result

        elif filter.range:
            result = True
            output = {}
            for col, ranges in filter["range"].items():
                first, rest = parse_field(col, data, depth)
                d = data[first]
                if not rest:
                    for sign, val in ranges.items():
                        if sign in ("gt", ">") and d <= val:
                            result = False
                        if sign == "gte" and d < val:
                            result = False
                        if sign == "lte" and d > val:
                            result = False
                        if sign == "lt" and d >= val:
                            result = False
                else:
                    output[rest] = ranges
            if result and output:
                return {"range": output}
            else:
                return result
        elif filter.missing:
            if isinstance(filter.missing, basestring):
                field = filter["missing"]
            else:
                field = filter["missing"]["field"]

            first, rest = parse_field(field, data, depth)
            d = data[first]
            if not rest:
                if d == None:
                    return True
                return False
            else:
                return {"missing": rest}
        elif filter.prefix:
            result = True
            output = {}
            for col, val in filter["prefix"].items():
                first, rest = parse_field(col, data, depth)
                d = data[first]
                if not rest:
                    if d == None or not d.startswith(val):
                        result = False
                else:
                    output[rest] = val
            if result and output:
                return {"prefix": output}
            else:
                return result

        elif filter.exists:
            if isinstance(filter["exists"], basestring):
                field = filter["exists"]
            else:
                field = filter["exists"]["field"]

            first, rest = parse_field(field, data, depth)
            d = data[first]
            if not rest:
                if d != None:
                    return True
                return False
            else:
                return {"exists": rest}
        else:
            Log.error(u"Can not interpret esfilter: {{esfilter}}",
                      {u"esfilter": filter})
Exemple #39
0
def table2list(
        column_names,  # tuple of columns names
        rows  # list of tuples
):
    return wrap([dict(zip(column_names, r)) for r in rows])
Exemple #40
0
def _normalize_edges(edges, schema=None):
    return wrap([_normalize_edge(e, schema=schema) for e in listwrap(edges)])
Exemple #41
0
def _normalize_select(select, frum, schema=None):
    """
    :param select: ONE SELECT COLUMN
    :param frum: TABLE TO get_columns()
    :param schema: SCHEMA TO LOOKUP NAMES FOR DEFINITIONS
    :return: AN ARRAY OF SELECT COLUMNS
    """
    if not _Column:
        _late_import()

    if isinstance(select, basestring):
        canonical = select = Dict(value=select)
    else:
        select = wrap(select)
        canonical = select.copy()

    canonical.aggregate = coalesce(canonical_aggregates[select.aggregate].name,
                                   select.aggregate, "none")
    canonical.default = coalesce(
        select.default, canonical_aggregates[canonical.aggregate].default)

    if hasattr(frum, "_normalize_select"):
        return frum._normalize_select(canonical)

    output = []
    if not select.value or select.value == ".":
        output.extend([
            set_default({
                "name": c.name,
                "value": jx_expression(c.name)
            }, canonical) for c in frum.get_leaves()
        ])
    elif isinstance(select.value, basestring):
        if select.value.endswith(".*"):
            base_name = select.value[:-2]
            canonical.name = coalesce(select.name, base_name, select.aggregate)
            value = jx_expression(select[:-2])
            if not isinstance(value, Variable):
                Log.error("`*` over general expression not supported yet")
                output.append([
                    set_default(
                        {
                            "name": base_name,
                            "value": LeavesOp("leaves", value),
                            "format": "dict"  # MARKUP FOR DECODING
                        },
                        canonical) for c in frum.get_columns()
                    if c.type not in ["object", "nested"]
                ])
            else:
                output.extend([
                    set_default(
                        {
                            "name":
                            base_name + "." +
                            literal_field(c.name[len(base_name) + 1:]),
                            "value":
                            jx_expression(c.name)
                        }, canonical) for c in frum.get_leaves()
                    if c.name.startswith(base_name + ".")
                ])
        else:
            canonical.name = coalesce(select.name, select.value,
                                      select.aggregate)
            canonical.value = jx_expression(select.value)
            output.append(canonical)

    output = wrap(output)
    if any(n == None for n in output.name):
        Log.error("expecting select to have a name: {{select}}", select=select)
    return output
Exemple #42
0
def find_holes(db, table_name, column_name, _range, filter=None):
    """
    FIND HOLES IN A DENSE COLUMN OF INTEGERS
    RETURNS A LIST OF {"min"min, "max":max} OBJECTS
    """
    if not filter:
        filter = {"match_all": {}}

    _range = wrap(_range)
    params = {
        "min": _range.min,
        "max": _range.max - 1,
        "column_name": db.quote_column(column_name),
        "table_name": db.quote_column(table_name),
        "filter": esfilter2sqlwhere(db, filter)
    }

    min_max = db.query(
        """
        SELECT
            min({{column_name}}) `min`,
            max({{column_name}})+1 `max`
        FROM
            {{table_name}} a
        WHERE
            a.{{column_name}} BETWEEN {{min}} AND {{max}} AND
            {{filter}}
    """, params)[0]

    db.execute("SET @last={{min}}-1", {"min": _range.min})
    ranges = db.query(
        """
        SELECT
            prev_rev+1 `min`,
            curr_rev `max`
        FROM (
            SELECT
                a.{{column_name}}-@last diff,
                @last prev_rev,
                @last:=a.{{column_name}} curr_rev
            FROM
                {{table_name}} a
            WHERE
                a.{{column_name}} BETWEEN {{min}} AND {{max}} AND
                {{filter}}
            ORDER BY
                a.{{column_name}}
        ) a
        WHERE
            diff>1
    """, params)

    if ranges:
        ranges.append({"min": min_max.max, "max": _range.max})
    else:
        if min_max.min:
            ranges.append({"min": _range.min, "max": min_max.min})
            ranges.append({"min": min_max.max, "max": _range.max})
        else:
            ranges.append(_range)

    return ranges
Exemple #43
0
 def __deepcopy__(self, memo):
     return wrap(dict.__deepcopy__(self, memo))
 def __iter__(self):
     return (wrap(v) for v in self._data.itervalues())
Exemple #45
0
 def items(self):
     return [(k, wrap(v)) for k, v in dict.items(self)
             if v != None or isinstance(v, Mapping)]
Exemple #46
0
    def _convert_query(self, query):
        # if not isinstance(query["from"], Container):
        #     Log.error('Expecting from clause to be a Container')
        query = wrap(query)

        output = QueryOp("from", None)
        output["from"] = self._convert_from(query["from"])

        output.format = query.format

        if query.select:
            output.select = convert_list(self._convert_select, query.select)
        else:
            if query.edges or query.groupby:
                output.select = {
                    "name": "count",
                    "value": ".",
                    "aggregate": "count",
                    "default": 0
                }
            else:
                output.select = {
                    "name": "__all__",
                    "value": "*",
                    "aggregate": "none"
                }

        if query.groupby and query.edges:
            Log.error(
                "You can not use both the `groupby` and `edges` clauses in the same query!"
            )
        elif query.edges:
            output.edges = convert_list(self._convert_edge, query.edges)
            output.groupby = None
        elif query.groupby:
            output.edges = None
            output.groupby = convert_list(self._convert_group, query.groupby)
        else:
            output.edges = []
            output.groupby = None

        output.where = self.convert(query.where)
        output.window = convert_list(self._convert_window, query.window)
        output.sort = self._convert_sort(query.sort)

        output.limit = coalesce(query.limit, DEFAULT_LIMIT)
        if not Math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN
        # THE from SOURCE IS.
        vars = get_all_vars(
            output, exclude_where=True)  # WE WILL EXCLUDE where VARIABLES
        for c in query.columns:
            if c.name in vars and c.nested_path:
                Log.error("This query, with variable {{var_name}} is too deep",
                          var_name=c.name)

        output.having = convert_list(self._convert_having, query.having)

        return output
Exemple #47
0
 def __deepcopy__(self, memo):
     d = _get(self, "_dict")
     return wrap(deepcopy(d, memo))
Exemple #48
0
 def add(self, message):
     message = wrap(message)
     m = Message()
     m.set_body(convert.value2json(message))
     self.queue.write(m)
Exemple #49
0
 def items(self):
     d = _get(self, "_dict")
     return [(k, wrap(v)) for k, v in d.items()
             if v != None or isinstance(v, Mapping)]
Exemple #50
0
def list_aggs(frum, query):
    frum = wrap(frum)
    select = listwrap(query.select)

    for e in query.edges:
        if isinstance(e.domain, DefaultDomain):
            accessor = jx_expression_to_function(e.value)
            unique_values = set(map(accessor, frum))
            if None in unique_values:
                e.allowNulls = coalesce(e.allowNulls, True)
                unique_values -= {None}
            e.domain = SimpleSetDomain(partitions=list(sorted(unique_values)))
        else:
            pass

    s_accessors = [(ss.name, compile_expression(ss.value.to_python())) for ss in select]

    result = {
        s.name: Matrix(
            dims=[len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges],
            zeros=lambda: windows.name2accumulator.get(s.aggregate)(**s)
        )
        for s in select
    }
    where = jx_expression_to_function(query.where)
    coord = [None]*len(query.edges)
    edge_accessor = [(i, make_accessor(e)) for i, e in enumerate(query.edges)]

    net_new_edge_names = set(wrap(query.edges).name) - UNION(e.value.vars() for e in query.edges)
    if net_new_edge_names & UNION(ss.value.vars() for ss in select):
        # s_accessor NEEDS THESE EDGES, SO WE PASS THEM ANYWAY
        for d in filter(where, frum):
            d = d.copy()
            for c, get_matches in edge_accessor:
                coord[c] = get_matches(d)

            for s_name, s_accessor in s_accessors:
                mat = result[s_name]
                for c in itertools.product(*coord):
                    acc = mat[c]
                    for e, cc in zip(query.edges, c):
                        d[e.name] = e.domain.partitions[cc]
                    val = s_accessor(d, c, frum)
                    acc.add(val)
    else:
        # FASTER
        for d in filter(where, frum):
            for c, get_matches in edge_accessor:
                coord[c] = get_matches(d)

            for s_name, s_accessor in s_accessors:
                mat = result[s_name]
                for c in itertools.product(*coord):
                    acc = mat[c]
                    val = s_accessor(d, c, frum)
                    acc.add(val)

    for s in select:
        # if s.aggregate == "count":
        #     continue
        m = result[s.name]
        for c, var in m.items():
            if var != None:
                m[c] = var.end()

    from pyLibrary.queries.containers.cube import Cube

    output = Cube(select, query.edges, result)
    return output
Exemple #51
0
        zip = ZIP_REQUEST

    if isinstance(url, unicode):
        # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
        url = url.encode("ascii")

    _to_ascii_dict(kwargs)
    timeout = kwargs[b'timeout'] = coalesce(kwargs.get(b'timeout'),
                                            default_timeout)

    if retry == None:
        retry = Dict(times=1, sleep=0)
    elif isinstance(retry, Number):
        retry = Dict(times=retry, sleep=1)
    else:
        retry = wrap(retry)
        if isinstance(retry.sleep, Duration):
            retry.sleep = retry.sleep.seconds
        set_default(retry, {"times": 1, "sleep": 0})

    if b'json' in kwargs:
        kwargs[b'data'] = convert.value2json(kwargs[b'json']).encode("utf8")
        del kwargs[b'json']

    try:
        headers = kwargs[b"headers"] = unwrap(
            coalesce(wrap(kwargs)[b"headers"], {}))
        set_default(headers, {b"accept-encoding": b"compress, gzip"})

        if zip and len(coalesce(kwargs.get(b"data"))) > 1000:
            compressed = convert.bytes2zip(kwargs[b"data"])
Exemple #52
0
 def assign(source, destination):
     destination[name] = field.value(wrap(source))
     return 0, None
Exemple #53
0
            seq = list(sequence)
            seq.append(row)
            output.append(seq)
            return
        elif new_filter is False:
            return

        seq = list(sequence)
        seq.append(row)
        for d in primary_branch[depth]:
            main(seq, new_filter, d, depth + 1)

    # OUTPUT
    for i, d in enumerate(data):
        if isinstance(d, Mapping):
            main([], esfilter, wrap(d), 0)
        else:
            Log.error("filter is expecting a dict, not {{type}}",
                      type=d.__class__)

    # AT THIS POINT THE primary_column[] IS DETERMINED
    # USE IT TO EXPAND output TO ALL NESTED OBJECTS
    max = 0  # EVEN THOUGH A ROW CAN HAVE MANY VALUES, WE ONLY NEED UP TO max
    for i, n in enumerate(primary_nested):
        if n:
            max = i + 1

    # OUTPUT IS A LIST OF ROWS,
    # WHERE EACH ROW IS A LIST OF VALUES SEEN DURING A WALK DOWN A PATH IN THE HIERARCHY
    uniform_output = DictList()
Exemple #54
0
def get_instance_metadata():
    output = wrap({
        k.replace("-", "_"): v
        for k, v in boto_utils.get_instance_metadata().items()
    })
    return output
    {"a": "x", "t": Date("today-2day").unix, "v": 3},
    {"a": "x", "t": Date("today-3day").unix, "v": 5},
    {"a": "x", "t": Date("today-4day").unix, "v": 7},
    {"a": "x", "t": Date("today-5day").unix, "v": 11},
    {"a": "x", "t": NULL, "v": 27},
    {"a": "y", "t": Date("today-day").unix, "v": 13},
    {"a": "y", "t": Date("today-2day").unix, "v": 17},
    {"a": "y", "t": Date("today-4day").unix, "v": 19},
    {"a": "y", "t": Date("today-5day").unix, "v": 23}
]

expected_list_1 = wrap([
    {"t": (TODAY - WEEK).unix, "v": NULL},
    {"t": (TODAY - 6 * DAY).unix, "v": NULL},
    {"t": (TODAY - 5 * DAY).unix, "v": 34},
    {"t": (TODAY - 4 * DAY).unix, "v": 26},
    {"t": (TODAY - 3 * DAY).unix, "v": 5},
    {"t": (TODAY - 2 * DAY).unix, "v": 20},
    {"t": (TODAY - 1 * DAY).unix, "v": 15},
    {"v": 29}
])

expected2 = wrap([
    {"a": "x", "t": (TODAY - WEEK).unix, "v": NULL},
    {"a": "x", "t": (TODAY - 6 * DAY).unix, "v": NULL},
    {"a": "x", "t": (TODAY - 5 * DAY).unix, "v": 11},
    {"a": "x", "t": (TODAY - 4 * DAY).unix, "v": 7},
    {"a": "x", "t": (TODAY - 3 * DAY).unix, "v": 5},
    {"a": "x", "t": (TODAY - 2 * DAY).unix, "v": 3},
    {"a": "x", "t": (TODAY - 1 * DAY).unix, "v": 2},
    {"a": "x", "v": 29},
    {"a": "y", "t": (TODAY - WEEK).unix, "v": NULL},
Exemple #56
0
    def as_dict(self):
        output = wrap({s: getattr(self, s) for s in QueryOp.__slots__})
        return output


canonical_aggregates = wrap({
    "count": {
        "name": "count",
        "default": 0
    },
    "min": {
        "name": "minimum"
    },
    "max": {
        "name": "maximum"
    },
    "add": {
        "name": "sum"
    },
    "avg": {
        "name": "average"
    },
    "mean": {
        "name": "average"
    },
})


def _normalize_selects(
    selects,
    frum,
Exemple #57
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        desc = wrap(desc)

        self.type = "set"
        self.order = {}
        self.NULL = Null
        self.partitions = DictList()
        self.primitive = True  # True IF DOMAIN IS A PRIMITIVE VALUE SET

        if isinstance(self.key, set):
            Log.error("problem")

        if not desc.key and isinstance(desc.partitions[0],
                                       (basestring, Number)):
            # ASSUME PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS
            self.key = "value"
            self.map = {}
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                part = {"name": p, "value": p, "dataIndex": i}
                self.partitions.append(part)
                self.map[p] = part
                self.order[p] = i
            self.label = coalesce(self.label, "name")
            self.primitive = True
            return

        if desc.partitions and desc.dimension.fields and len(
                desc.dimension.fields) > 1:
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.dimension.fields)
        elif desc.partitions and isinstance(desc.key, (list, set)):
            # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
        elif desc.partitions and isinstance(desc.partitions[0][desc.key],
                                            Mapping):
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
            # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions)
            # self.map = UniqueIndex(keys=self.key)
        elif len(desc.partitions) == 0:
            # CREATE AN EMPTY DOMAIN
            self.key = "value"
            self.map = {}
            self.order[None] = 0
            self.label = coalesce(self.label, "name")
            return
        elif desc.key == None:
            if desc.partitions and len(set(desc.partitions.value)) == len(
                    desc.partitions):
                # TRY A COMMON KEY CALLED "value".  IT APPEARS UNIQUE
                self.key = "value"
                self.map = dict()
                self.map[None] = self.NULL
                self.order[None] = len(desc.partitions)
                for i, p in enumerate(desc.partitions):
                    self.map[p[self.key]] = p
                    self.order[p[self.key]] = i
                self.primitive = False
            else:
                Log.error("Domains must have keys")
        elif self.key:
            self.key = desc.key
            self.map = dict()
            self.map[None] = self.NULL
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                self.map[p[self.key]] = p
                self.order[p[self.key]] = i
            self.primitive = False
        elif all(p.esfilter for p in self.partitions):
            # EVERY PART HAS AN esfilter DEFINED, SO USE THEM
            for i, p in enumerate(self.partitions):
                p.dataIndex = i

        else:
            Log.error("Can not hanldle")

        self.label = coalesce(self.label, "name")

        if hasattr(desc.partitions, "__iter__"):
            self.partitions = list(desc.partitions)
        else:
            Log.error("expecting a list of partitions")
Exemple #58
0
 def __init__(self, **desc):
     desc = wrap(desc)
     self._set_slots_to_none(self.__class__)
     set_default(self, desc)
     self.name = coalesce(desc.name, desc.type)
     self.isFacet = coalesce(desc.isFacet, False)
Exemple #59
0
def _normalize_groupby(groupby, schema=None):
    if groupby == None:
        return None
    return wrap(
        [_normalize_group(e, schema=schema) for e in listwrap(groupby)])
Exemple #60
0
def buildCondition(mvel, edge, partition):
    """
    RETURN AN ES FILTER OBJECT
    """
    output = {}

    if edge.domain.isFacet:
        # MUST USE THIS' esFacet
        condition = wrap(coalesce(partition.where, {"and": []}))

        if partition.min and partition.max and is_keyword(edge.value):
            condition["and"].append({
                "range": {
                    edge.value: {
                        "gte": partition.min,
                        "lt": partition.max
                    }
                }
            })

        # ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT)
        return simplify_esfilter(condition)
    elif edge.range:
        # THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN
        # USE MVEL CODE
        if edge.domain.type in domains.ALGEBRAIC:
            output = {"and": []}

            if edge.range.mode and edge.range.mode == "inclusive":
                # IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE
                if is_keyword(edge.range.min):
                    output["and"].append({
                        "range": {
                            edge.range.min: {
                                "lt":
                                es09.expressions.value2value(partition.max)
                            }
                        }
                    })
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({
                        "script": {
                            "script":
                            mvel.compile_expression(
                                edge.range.min + " < " +
                                es09.expressions.value2MVEL(partition.max))
                        }
                    })

                if is_keyword(edge.range.max):
                    output["and"].append({
                        "or": [{
                            "missing": {
                                "field": edge.range.max
                            }
                        }, {
                            "range": {
                                edge.range.max, {
                                    "gt":
                                    es09.expressions.value2value(partition.min)
                                }
                            }
                        }]
                    })
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({
                        "script": {
                            "script":
                            mvel.compile_expression(
                                edge.range.max + " > " +
                                es09.expressions.value2MVEL(partition.min))
                        }
                    })

            else:
                # SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE
                if is_keyword(edge.range.min):
                    output["and"].append({
                        "range": {
                            edge.range.min: {
                                "lte":
                                es09.expressions.value2value(partition.min)
                            }
                        }
                    })
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({
                        "script": {
                            "script":
                            mvel.compile_expression(
                                edge.range.min + "<=" +
                                es09.expressions.value2MVEL(partition.min))
                        }
                    })

                if is_keyword(edge.range.max):
                    output["and"].append({
                        "or": [{
                            "missing": {
                                "field": edge.range.max
                            }
                        }, {
                            "range": {
                                edge.range.max, {
                                    "gte":
                                    es09.expressions.value2value(partition.min)
                                }
                            }
                        }]
                    })
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({
                        "script": {
                            "script":
                            mvel.compile_expression(
                                es09.expressions.value2MVEL(partition.min) +
                                " <= " + edge.range.max)
                        }
                    })
            return output
        else:
            Log.error(
                "Do not know how to handle range query on non-continuous domain"
            )

    elif not edge.value:
        # MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE)
        return partition.esfilter
    elif is_keyword(edge.value):
        # USE FAST ES SYNTAX
        if edge.domain.type in domains.ALGEBRAIC:
            output.range = {}
            output.range[edge.value] = {
                "gte": es09.expressions.value2query(partition.min),
                "lt": es09.expressions.value2query(partition.max)
            }
        elif edge.domain.type == "set":
            if partition.value:
                if partition.value != edge.domain.getKey(partition):
                    Log.error(
                        "please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former"
                    )
                    # DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS
                output.term = {edge.value: partition.value}
            else:
                output.term = {edge.value: edge.domain.getKey(partition)}

        elif edge.domain.type == "default":
            output.term = dict()
            output.term[edge.value] = partition.value
        else:
            Log.error("Edge \"" + edge.name + "\" is not supported")

        return output
    else:
        # USE MVEL CODE
        if edge.domain.type in domains.ALGEBRAIC:
            output.script = {
                "script":
                edge.value + ">=" +
                es09.expressions.value2MVEL(partition.min) + " and " +
                edge.value + "<" + es09.expressions.value2MVEL(partition.max)
            }
        else:
            output.script = {
                "script":
                "( " + edge.value + " ) ==" +
                es09.expressions.value2MVEL(partition.value)
            }

        code = es09.expressions.addFunctions(output.script.script)
        output.script.script = code.head + code.body
        return output