예제 #1
0
def _normalize_range(range):
    if range == None:
        return None

    return Data(min=None if range.min == None else jx_expression(range.min),
                max=None if range.max == None else jx_expression(range.max),
                mode=range.mode)
예제 #2
0
def _normalize_edge(edge, schema=None):
    if not _Column:
        _late_import()

    if edge == None:
        Log.error("Edge has no value, or expression is empty")
    elif isinstance(edge, basestring):
        if schema:
            try:
                e = schema[edge]
            except Exception, _:
                e = None
            e = unwrap(unwraplist(e))
            if e and not isinstance(e, (_Column, set, list)):
                if isinstance(e, _Column):
                    return Data(name=edge,
                                value=jx_expression(edge),
                                allowNulls=True,
                                domain=_normalize_domain(domain=e,
                                                         schema=schema))
                elif isinstance(e.fields, list) and len(e.fields) == 1:
                    return Data(name=e.name,
                                value=jx_expression(e.fields[0]),
                                allowNulls=True,
                                domain=e.getDomain())
                else:
                    return Data(name=e.name,
                                allowNulls=True,
                                domain=e.getDomain())
        return Data(name=edge,
                    value=jx_expression(edge),
                    allowNulls=True,
                    domain=_normalize_domain(schema=schema))
예제 #3
0
def _normalize_sort(sort=None):
    """
    CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE
    """

    if sort == None:
        return FlatList.EMPTY

    output = FlatList()
    for s in listwrap(sort):
        if isinstance(s, basestring):
            output.append({"value": jx_expression(s), "sort": 1})
        elif isinstance(s, Expression):
            output.append({"value": s, "sort": 1})
        elif Math.is_integer(s):
            output.append({"value": OffsetOp("offset", s), "sort": 1})
        elif all(d in sort_direction
                 for d in s.values()) and not s.sort and not s.value:
            for v, d in s.items():
                output.append({
                    "value": jx_expression(v),
                    "sort": sort_direction[d]
                })
        else:
            output.append({
                "value": jx_expression(coalesce(s.value, s.field)),
                "sort": coalesce(sort_direction[s.sort], 1)
            })
    return output
예제 #4
0
def _normalize_group(edge, schema=None):
    if isinstance(edge, basestring):
        return wrap({
            "name": edge,
            "value": jx_expression(edge),
            "allowNulls": True,
            "domain": {
                "type": "default"
            }
        })
    else:
        edge = wrap(edge)
        if (edge.domain
                and edge.domain.type != "default") or edge.allowNulls != None:
            Log.error("groupby does not accept complicated domains")

        if not edge.name and not isinstance(edge.value, basestring):
            Log.error("You must name compound edges: {{edge}}", edge=edge)

        return wrap({
            "name": coalesce(edge.name, edge.value),
            "value": jx_expression(edge.value),
            "allowNulls": True,
            "domain": {
                "type": "default"
            }
        })
예제 #5
0
def _normalize_group(edge, dim_index, schema=None):
    """
    :param edge: Not normalized groupby 
    :param dim_index: Dimensions are ordered; this is this groupby's index into that order
    :param schema: for context
    :return: a normalized groupby
    """
    if isinstance(edge, basestring):
        if edge.endswith(".*"):
            prefix = edge[:-1]
            if schema:
                output = wrap([{
                    "name": literal_field(k),
                    "value": jx_expression(k),
                    "allowNulls": True,
                    "domain": {
                        "type": "default"
                    }
                } for k, cs in schema.items() if k.startswith(prefix)
                               for c in cs if c.type not in STRUCT])
                return output
            else:
                return wrap([{
                    "name": edge[:-2],
                    "value": jx_expression(edge[:-2]),
                    "allowNulls": True,
                    "dim": dim_index,
                    "domain": {
                        "type": "default"
                    }
                }])

        return wrap([{
            "name": edge,
            "value": jx_expression(edge),
            "allowNulls": True,
            "dim": dim_index,
            "domain": {
                "type": "default"
            }
        }])
    else:
        edge = wrap(edge)
        if (edge.domain
                and edge.domain.type != "default") or edge.allowNulls != None:
            Log.error("groupby does not accept complicated domains")

        if not edge.name and not isinstance(edge.value, basestring):
            Log.error("You must name compound edges: {{edge}}", edge=edge)

        return wrap([{
            "name": coalesce(edge.name, edge.value),
            "value": jx_expression(edge.value),
            "allowNulls": True,
            "dim": dim_index,
            "domain": {
                "type": "default"
            }
        }])
예제 #6
0
def _normalize_range(range):
    if range == None:
        return None

    return Dict(
        min=None if range.min == None else jx_expression(range.min),
        max=None if range.max == None else jx_expression(range.max),
    )
예제 #7
0
    def update(self, command):
        """
        EXPECTING command == {"set":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS AN ES FILTER
        """
        command = wrap(command)
        schema = self._es.get_schema()

        # GET IDS OF DOCUMENTS
        results = self._es.search(
            {
                "fields": listwrap(schema._routing.path),
                "query": {
                    "filtered": {"query": {"match_all": {}}, "filter": jx_expression(command.where).to_esfilter()}
                },
                "size": 200000,
            }
        )

        # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
        scripts = FlatList()
        for k, v in command.set.items():
            if not is_keyword(k):
                Log.error("Only support simple paths for now")
            if isinstance(v, Mapping) and v.doc:
                scripts.append({"doc": v.doc})
            else:
                scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby()})

        if results.hits.hits:
            updates = []
            for h in results.hits.hits:
                for s in scripts:
                    updates.append(
                        {
                            "update": {
                                "_id": h._id,
                                "_routing": unwraplist(h.fields[literal_field(schema._routing.path)]),
                            }
                        }
                    )
                    updates.append(s)
            content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode("utf-8")
            response = self._es.cluster.post(
                self._es.path + "/_bulk",
                data=content,
                headers={"Content-Type": "application/json"},
                timeout=self.settings.timeout,
                params={"consistency": self.settings.consistency},
            )
            if response.errors:
                Log.error(
                    "could not update: {{error}}",
                    error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)],
                )
예제 #8
0
def _normalize_group(edge, dim_index, schema=None):
    """
    :param edge: Not normalized groupby 
    :param dim_index: Dimensions are ordered; this is this groupby's index into that order
    :param schema: for context
    :return: a normalized groupby
    """
    if isinstance(edge, basestring):
        if edge.endswith(".*"):
            prefix = edge[:-1]
            if schema:
                output = wrap([
                    {
                        "name": literal_field(k),
                        "value": jx_expression(k),
                        "allowNulls": True,
                        "domain": {"type": "default"}
                    }
                    for k, cs in schema.items()
                    if k.startswith(prefix)
                    for c in cs
                    if c.type not in STRUCT
                ])
                return output
            else:
                return wrap([{
                    "name": edge[:-2],
                    "value": jx_expression(edge[:-2]),
                    "allowNulls": True,
                    "dim":dim_index,
                    "domain": {"type": "default"}
                }])

        return wrap([{
            "name": edge,
            "value": jx_expression(edge),
            "allowNulls": True,
            "dim":dim_index,
            "domain": {"type": "default"}
        }])
    else:
        edge = wrap(edge)
        if (edge.domain and edge.domain.type != "default") or edge.allowNulls != None:
            Log.error("groupby does not accept complicated domains")

        if not edge.name and not isinstance(edge.value, basestring):
            Log.error("You must name compound edges: {{edge}}",  edge= edge)

        return wrap([{
            "name": coalesce(edge.name, edge.value),
            "value": jx_expression(edge.value),
            "allowNulls": True,
            "dim":dim_index,
            "domain": {"type": "default"}
        }])
예제 #9
0
def _normalize_select_no_context(select, schema=None):
    """
    SAME NORMALIZE, BUT NO SOURCE OF COLUMNS
    """
    if not _Column:
        _late_import()

    if isinstance(select, basestring):
        select = Data(value=select)
    else:
        select = wrap(select)

    output = select.copy()
    if not select.value:
        output.name = coalesce(select.name, select.aggregate)
        if output.name:
            output.value = jx_expression(".")
        else:
            return output
    elif isinstance(select.value, basestring):
        if select.value.endswith(".*"):
            output.name = coalesce(select.name, select.value[:-2],
                                   select.aggregate)
            output.value = LeavesOp("leaves", Variable(select.value[:-2]))
        else:
            if select.value == ".":
                output.name = coalesce(select.name, select.aggregate, ".")
                output.value = jx_expression(select.value)
            elif select.value == "*":
                output.name = coalesce(select.name, select.aggregate, ".")
                output.value = LeavesOp("leaves", Variable("."))
            else:
                output.name = coalesce(select.name, select.value,
                                       select.aggregate)
                output.value = jx_expression(select.value)
    elif isinstance(select.value, (int, float)):
        if not output.name:
            output.name = unicode(select.value)
        output.value = jx_expression(select.value)
    else:
        output.value = jx_expression(select.value)

    if not output.name:
        Log.error("expecting select to have a name: {{select}}", select=select)
    if output.name.endswith(".*"):
        Log.error("{{name|quote}} is invalid select", name=output.name)

    output.aggregate = coalesce(canonical_aggregates[select.aggregate].name,
                                select.aggregate, "none")
    output.default = coalesce(select.default,
                              canonical_aggregates[output.aggregate].default)
    return output
예제 #10
0
def _normalize_edge(edge, schema=None):
    if not _Column:
        _late_import()

    if isinstance(edge, basestring):
        if schema:
            e = schema[edge]
            if e:
                if isinstance(e, _Column):
                    return Dict(name=edge,
                                value=jx_expression(edge),
                                allowNulls=True,
                                domain=_normalize_domain(schema=schema))
                elif isinstance(e.fields, list) and len(e.fields) == 1:
                    return Dict(name=e.name,
                                value=jx_expression(e.fields[0]),
                                allowNulls=True,
                                domain=e.getDomain())
                else:
                    return Dict(name=e.name,
                                allowNulls=True,
                                domain=e.getDomain())
        return Dict(name=edge,
                    value=jx_expression(edge),
                    allowNulls=True,
                    domain=_normalize_domain(schema=schema))
    else:
        edge = wrap(edge)
        if not edge.name and not isinstance(edge.value, basestring):
            Log.error("You must name compound and complex edges: {{edge}}",
                      edge=edge)

        if isinstance(edge.value, (list, set)) and not edge.domain:
            # COMPLEX EDGE IS SHORT HAND
            domain = _normalize_domain(schema=schema)
            domain.dimension = Dict(fields=edge.value)

            return Dict(name=edge.name,
                        allowNulls=bool(coalesce(edge.allowNulls, True)),
                        domain=domain)

        domain = _normalize_domain(edge.domain, schema=schema)

        return Dict(name=coalesce(edge.name, edge.value),
                    value=jx_expression(edge.value),
                    range=_normalize_range(edge.range),
                    allowNulls=bool(coalesce(edge.allowNulls, True)),
                    domain=domain)
예제 #11
0
def _normalize_select_no_context(select, schema=None):
    """
    SAME NORMALIZE, BUT NO SOURCE OF COLUMNS
    """
    if not _Column:
        _late_import()

    if isinstance(select, basestring):
        select = Data(value=select)
    else:
        select = wrap(select)

    output = select.copy()
    if not select.value:
        output.name = coalesce(select.name, select.aggregate)
        if output.name:
            output.value = jx_expression(".")
        else:
            return output
    elif isinstance(select.value, basestring):
        if select.value.endswith(".*"):
            output.name = coalesce(select.name, select.value[:-2], select.aggregate)
            output.value = LeavesOp("leaves", Variable(select.value[:-2]))
        else:
            if select.value == ".":
                output.name = coalesce(select.name, select.aggregate, ".")
                output.value = jx_expression(select.value)
            elif select.value == "*":
                output.name = coalesce(select.name, select.aggregate, ".")
                output.value = LeavesOp("leaves", Variable("."))
            else:
                output.name = coalesce(select.name, select.value, select.aggregate)
                output.value = jx_expression(select.value)
    elif isinstance(select.value, (int, float)):
        if not output.name:
            output.name = unicode(select.value)
        output.value = jx_expression(select.value)
    else:
        output.value = jx_expression(select.value)

    if not output.name:
        Log.error("expecting select to have a name: {{select}}",  select= select)
    if output.name.endswith(".*"):
        Log.error("{{name|quote}} is invalid select", name=output.name)

    output.aggregate = coalesce(canonical_aggregates[select.aggregate].name, select.aggregate, "none")
    output.default = coalesce(select.default, canonical_aggregates[output.aggregate].default)
    return output
예제 #12
0
 def edges_get_all_vars(e):
     output = set()
     if isinstance(e.value, basestring):
         output.add(e.value)
     if e.domain.key:
         output.add(e.domain.key)
     if e.domain.where:
         output |= jx_expression(e.domain.where).vars()
     if e.range:
         output |= jx_expression(e.range.min).vars()
         output |= jx_expression(e.range.max).vars()
     if e.domain.partitions:
         for p in e.domain.partitions:
             if p.where:
                 output |= p.where.vars()
     return output
예제 #13
0
    def where(self, filter):
        """
        WILL NOT PULL WHOLE OBJECT, JUST TOP-LEVEL PROPERTIES
        :param filter:  jx_expression filter
        :return: list of objects that match
        """
        select = []
        column_names = []
        for cname, cs in self.columns.items():
            cs = [
                c for c in cs
                if c.type not in ["nested", "object"] and not c.nested_path
            ]
            if len(cs) == 0:
                continue
            column_names.append(cname)
            if len(cs) == 1:
                select.append(
                    quote_table(c.es_column) + " " + quote_table(c.name))
            else:
                select.append("coalesce(" +
                              ",".join(quote_table(c.es_column) for c in cs) +
                              ") " + quote_table(c.name))

        result = self.db.query(" SELECT " + "\n,".join(select) + " FROM " +
                               quote_table(self.name) + " WHERE " +
                               jx_expression(filter).to_sql())
        return wrap([{c: v
                      for c, v in zip(column_names, r)} for r in result.data])
예제 #14
0
 def test_eq2(self):
     where = {"eq": {"a": 1, "b": 2}}
     result = simplify_esfilter(jx_expression(where).to_esfilter())
     if USE_BOOL_MUST:
         self.assertEqual(
             result,
             {"bool": {
                 "must": [{
                     "term": {
                         "a": 1
                     }
                 }, {
                     "term": {
                         "b": 2
                     }
                 }]
             }})
     else:
         self.assertEqual(result,
                          {"and": [{
                              "term": {
                                  "a": 1
                              }
                          }, {
                              "term": {
                                  "b": 2
                              }
                          }]})
예제 #15
0
    def _groupby_op(self, query):
        selects = []
        for s in listwrap(query.select):
            if s.value == "." and s.aggregate == "count":
                selects.append("COUNT(1) AS " + quote_table(s.name))
            else:
                selects.append(sql_aggs[s.aggregate] + "(" +
                               jx_expression(s.value).to_sql() + ") AS " +
                               quote_table(s.name))

        for w in query.window:
            selects.append(self._window_op(self, query, w))

        agg = " FROM " + quote_table(self.name) + " a\n"
        groupby = ""
        groupby_prefix = " GROUP BY "

        for i, e in enumerate(query.edges):
            value = e.to_sql()
            groupby += groupby_prefix + value
            groupby_prefix = ",\n"

        where = "\nWHERE " + query.where.to_sql()

        return "SELECT " + (",\n".join(selects)) + agg + where + groupby
예제 #16
0
 def edges_get_all_vars(e):
     output = set()
     if isinstance(e.value, basestring):
         output.add(e.value)
     if e.domain.key:
         output.add(e.domain.key)
     if e.domain.where:
         output |= jx_expression(e.domain.where).vars()
     if e.range:
         output |= jx_expression(e.range.min).vars()
         output |= jx_expression(e.range.max).vars()
     if e.domain.partitions:
         for p in e.domain.partitions:
             if p.where:
                 output |= p.where.vars()
     return output
    def where(self, filter):
        """
        WILL NOT PULL WHOLE OBJECT, JUST TOP-LEVEL PROPERTIES
        :param filter:  jx_expression filter
        :return: list of objects that match
        """
        select = []
        column_names = []
        for cname, cs in self.columns.items():
            cs = [c for c in cs if c.type not in ["nested", "object"] and not c.nested_path]
            if len(cs) == 0:
                continue
            column_names.append(cname)
            if len(cs) == 1:
                select.append(quote_table(c.es_column) + " " + quote_table(c.name))
            else:
                select.append(
                    "coalesce(" +
                    ",".join(quote_table(c.es_column) for c in cs) +
                    ") " + quote_table(c.name)
                )

        result = self.db.query(
            " SELECT " + "\n,".join(select) +
            " FROM " + quote_table(self.name) +
            " WHERE " + jx_expression(filter).to_sql()
        )
        return wrap([{c: v for c, v in zip(column_names, r)} for r in result.data])
예제 #18
0
def es_fieldop(es, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)
    FromES.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter":
            simplify_esfilter(jx_expression(query.where).to_esfilter())
        }
    }
    FromES.size = coalesce(query.limit, 200000)
    FromES.fields = FlatList()
    for s in select.value:
        if s == "*":
            FromES.fields = None
        elif isinstance(s, list):
            FromES.fields.extend(s)
        elif isinstance(s, Mapping):
            FromES.fields.extend(s.values())
        else:
            FromES.fields.append(s)
    FromES.sort = [{
        s.field: "asc" if s.sort >= 0 else "desc"
    } for s in query.sort]

    data = es09.util.post(es, FromES, query.limit)

    T = data.hits.hits
    matricies = {}
    for s in select:
        if s.value == "*":
            matricies[s.name] = Matrix.wrap([t._source for t in T])
        elif isinstance(s.value, Mapping):
            # for k, v in s.value.items():
            #     matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T])
            matricies[s.name] = Matrix.wrap([{
                k: unwrap(t.fields).get(v, None)
                for k, v in s.value.items()
            } for t in T])
        elif isinstance(s.value, list):
            matricies[s.name] = Matrix.wrap([
                tuple(unwrap(t.fields).get(ss, None) for ss in s.value)
                for t in T
            ])
        elif not s.value:
            matricies[s.name] = Matrix.wrap(
                [unwrap(t.fields).get(s.value, None) for t in T])
        else:
            try:
                matricies[s.name] = Matrix.wrap(
                    [unwrap(t.fields).get(s.value, None) for t in T])
            except Exception as e:
                Log.error("", e)

    cube = Cube(query.select, query.edges, matricies, frum=query)
    cube.frum = query
    return cube
예제 #19
0
def groupby(data,
            keys=None,
            size=None,
            min_size=None,
            max_size=None,
            contiguous=False):
    """
    :param data:
    :param keys:
    :param size:
    :param min_size:
    :param max_size:
    :param contiguous: MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    :return: return list of (keys, values) PAIRS, WHERE
                 keys IS IN LEAF FORM (FOR USE WITH {"eq": terms} OPERATOR
                 values IS GENERATOR OF ALL VALUE THAT MATCH keys
        contiguous -
    """
    if isinstance(data, Container):
        return data.groupby(keys)

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    try:
        keys = listwrap(keys)
        if not contiguous:
            from pyLibrary.queries import jx
            data = jx.sort(data, keys)

        if not data:
            return Null

        accessor = jx_expression_to_function(jx_expression(
            {"tuple":
             keys}))  # CAN RETURN Null, WHICH DOES NOT PLAY WELL WITH __cmp__

        def _output():
            start = 0
            prev = accessor(data[0])
            for i, d in enumerate(data):
                curr = accessor(d)
                if curr != prev:
                    group = {}
                    for k, gg in zip(keys, prev):
                        group[k] = gg
                    yield Dict(group), data[start:i:]
                    start = i
                    prev = curr
            group = {}
            for k, gg in zip(keys, prev):
                group[k] = gg
            yield Dict(group), data[start::]

        return _output()
    except Exception, e:
        Log.error("Problem grouping", cause=e)
예제 #20
0
    def test_range_packing1(self):
        where = {"and": [
            {"gt": {"a": 20}},
            {"lt": {"a": 40}}
        ]}

        result = simplify_esfilter(jx_expression(where).to_esfilter())
        self.assertEqual(result, {"range": {"a": {"gt": 20, "lt": 40}}})
예제 #21
0
    def test_range_packing2(self):
        where = {"and": [
            {"gte": {"build.date": 1429747200}},
            {"lt": {"build.date": 1429920000}}
        ]}

        result = simplify_esfilter(jx_expression(where).to_esfilter())
        self.assertEqual(result, {"range": {"build.date": {"gte": Date("23 APR 2015").unix, "lt": Date("25 APR 2015").unix}}})
예제 #22
0
    def update(self, command):
        """
        EXPECTING command == {"set":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS AN ES FILTER
        """
        command = wrap(command)
        schema = self._es.get_schema()

        # GET IDS OF DOCUMENTS
        results = self._es.search({
            "fields": listwrap(schema._routing.path),
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": jx_expression(command.where).to_esfilter()
            }},
            "size": 200000
        })

        # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
        scripts = FlatList()
        for k, v in command.set.items():
            if not is_keyword(k):
                Log.error("Only support simple paths for now")
            if isinstance(v, Mapping) and v.doc:
                scripts.append({"doc": v.doc})
            else:
                scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby()})

        if results.hits.hits:
            updates = []
            for h in results.hits.hits:
                for s in scripts:
                    updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}})
                    updates.append(s)
            content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode('utf-8')
            response = self._es.cluster.post(
                self._es.path + "/_bulk",
                data=content,
                headers={"Content-Type": "application/json"},
                timeout=self.settings.timeout,
                params={"consistency": self.settings.consistency}
            )
            if response.errors:
                Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
예제 #23
0
 def test_value_not_a_variable(self):
     result = jx_expression({
         "eq": {
             "result.test": "/XMLHttpRequest/send-entity-body-document.htm"
         }
     }).vars()
     expected = {"result.test"}
     self.assertEqual(result, expected,
                      "expecting the one and only variable")
예제 #24
0
    def wrap(query, schema=None):
        """
        NORMALIZE QUERY SO IT CAN STILL BE JSON
        """
        if isinstance(query, QueryOp) or query == None:
            return query

        query = wrap(query)

        output = QueryOp("from", None)
        output.format = query.format
        output.frum = wrap_from(query["from"], schema=schema)
        if not schema and isinstance(output.frum, Schema):
            schema = output.frum
        if not schema and hasattr(output.frum, "schema"):
            schema = output.frum.schema

        if query.select or isinstance(query.select, (Mapping, list)):
            output.select = _normalize_selects(query.select,
                                               query.frum,
                                               schema=schema)
        else:
            if query.edges or query.groupby:
                output.select = Data(name="count",
                                     value=jx_expression("."),
                                     aggregate="count",
                                     default=0)
            else:
                output.select = _normalize_selects(".", query.frum)

        if query.groupby and query.edges:
            Log.error(
                "You can not use both the `groupby` and `edges` clauses in the same query!"
            )
        elif query.edges:
            output.edges = _normalize_edges(query.edges, schema=schema)
            output.groupby = Null
        elif query.groupby:
            output.edges = Null
            output.groupby = _normalize_groupby(query.groupby, schema=schema)
        else:
            output.edges = Null
            output.groupby = Null

        output.where = _normalize_where(query.where, schema=schema)
        output.window = [_normalize_window(w) for w in listwrap(query.window)]
        output.having = None
        output.sort = _normalize_sort(query.sort)
        output.limit = Math.min(MAX_LIMIT, coalesce(query.limit,
                                                    DEFAULT_LIMIT))
        if not Math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        return output
예제 #25
0
def _normalize_group(edge, schema=None):
    if isinstance(edge, basestring):
        return wrap({"name": edge, "value": jx_expression(edge), "allowNulls": True, "domain": {"type": "default"}})
    else:
        edge = wrap(edge)
        if (edge.domain and edge.domain.type != "default") or edge.allowNulls != None:
            Log.error("groupby does not accept complicated domains")

        if not edge.name and not isinstance(edge.value, basestring):
            Log.error("You must name compound edges: {{edge}}", edge=edge)

        return wrap(
            {
                "name": coalesce(edge.name, edge.value),
                "value": jx_expression(edge.value),
                "allowNulls": True,
                "domain": {"type": "default"},
            }
        )
예제 #26
0
    def where(self, where):
        temp = None
        if isinstance(where, Mapping):
            exec("def temp(row):\n    return "+jx_expression(where).to_python())
        elif isinstance(where, Expression):
            temp = compile_expression(where.to_python())
        else:
            temp = where

        return ListContainer("from "+self.name, filter(temp, self.data), self.schema)
예제 #27
0
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False):
    """
    :param data:
    :param keys:
    :param size:
    :param min_size:
    :param max_size:
    :param contiguous: MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    :return: return list of (keys, values) PAIRS, WHERE
                 keys IS IN LEAF FORM (FOR USE WITH {"eq": terms} OPERATOR
                 values IS GENERATOR OF ALL VALUE THAT MATCH keys
        contiguous -
    """
    if isinstance(data, Container):
        return data.groupby(keys)

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    try:
        keys = listwrap(keys)
        if not contiguous:
            from pyLibrary.queries import jx
            data = jx.sort(data, keys)

        if not data:
            return Null

        if any(isinstance(k, Expression) for k in keys):
            Log.error("can not handle expressions")
        else:
            accessor = jx_expression_to_function(jx_expression({"tuple": keys}))  # CAN RETURN Null, WHICH DOES NOT PLAY WELL WITH __cmp__

        def _output():
            start = 0
            prev = accessor(data[0])
            for i, d in enumerate(data):
                curr = accessor(d)
                if curr != prev:
                    group = {}
                    for k, gg in zip(keys, prev):
                        group[k] = gg
                    yield Data(group), data[start:i:]
                    start = i
                    prev = curr
            group = {}
            for k, gg in zip(keys, prev):
                group[k] = gg
            yield Data(group), data[start::]

        return _output()
    except Exception as e:
        Log.error("Problem grouping", cause=e)
예제 #28
0
def _normalize_sort(sort=None):
    """
    CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE
    """

    if not sort:
        return DictList.EMPTY

    output = DictList()
    for s in listwrap(sort):
        if isinstance(s, basestring) or Math.is_integer(s):
            output.append({"value": jx_expression(s), "sort": 1})
        elif all(d in sort_direction for d in s.values()) and not s.sort and not s.value:
            for v, d in s.items():
                output.append({"value": jx_expression(v), "sort": -1})
        else:
            output.append(
                {"value": jx_expression(coalesce(s.value, s.field)), "sort": coalesce(sort_direction[s.sort], 1)}
            )
    return output
예제 #29
0
    def where(self, where):
        temp = None
        if isinstance(where, Mapping):
            exec("def temp(row):\n    return " +
                 jx_expression(where).to_python())
        elif isinstance(where, Expression):
            temp = compile_expression(where.to_python())
        else:
            temp = where

        return ListContainer("from " + self.name, filter(temp, self.data),
                             self.schema)
예제 #30
0
def _normalize_edge(edge, schema=None):
    if not _Column:
        _late_import()

    if edge == None:
        Log.error("Edge has no value, or expression is empty")
    elif isinstance(edge, basestring):
        if schema:
            try:
                e = schema[edge]
            except Exception, e:
                e = None
            e = unwraplist(e)
            if e and not isinstance(e, (_Column, set, list)):
                if isinstance(e, _Column):
                    return Data(
                        name=edge,
                        value=jx_expression(edge),
                        allowNulls=True,
                        domain=_normalize_domain(domain=e, schema=schema)
                    )
                elif isinstance(e.fields, list) and len(e.fields) == 1:
                    return Data(
                        name=e.name,
                        value=jx_expression(e.fields[0]),
                        allowNulls=True,
                        domain=e.getDomain()
                    )
                else:
                    return Data(
                        name=e.name,
                        allowNulls=True,
                        domain=e.getDomain()
                    )
        return Data(
            name=edge,
            value=jx_expression(edge),
            allowNulls=True,
            domain=_normalize_domain(schema=schema)
        )
예제 #31
0
def _normalize_group(edge, schema=None):
    if isinstance(edge, basestring):
        if edge.endswith(".*"):
            prefix = edge[:-1]
            output = wrap([{
                "name": literal_field(k),
                "value": jx_expression(c.es_column),
                "allowNulls": True,
                "domain": {
                    "type": "default"
                }
            } for k, cs in schema.lookup.items() if k.startswith(prefix)
                           for c in cs if c.type not in STRUCT])
            return output

        return wrap([{
            "name": edge,
            "value": jx_expression(edge),
            "allowNulls": True,
            "domain": {
                "type": "default"
            }
        }])
    else:
        edge = wrap(edge)
        if (edge.domain
                and edge.domain.type != "default") or edge.allowNulls != None:
            Log.error("groupby does not accept complicated domains")

        if not edge.name and not isinstance(edge.value, basestring):
            Log.error("You must name compound edges: {{edge}}", edge=edge)

        return wrap([{
            "name": coalesce(edge.name, edge.value),
            "value": jx_expression(edge.value),
            "allowNulls": True,
            "domain": {
                "type": "default"
            }
        }])
예제 #32
0
def _normalize_edge(edge, schema=None):
    if not _Column:
        _late_import()

    if isinstance(edge, basestring):
        if schema:
            e = schema[edge]
            if e:
                if isinstance(e, _Column):
                    return Dict(
                        name=edge, value=jx_expression(edge), allowNulls=True, domain=_normalize_domain(schema=schema)
                    )
                elif isinstance(e.fields, list) and len(e.fields) == 1:
                    return Dict(name=e.name, value=jx_expression(e.fields[0]), allowNulls=True, domain=e.getDomain())
                else:
                    return Dict(name=e.name, allowNulls=True, domain=e.getDomain())
        return Dict(name=edge, value=jx_expression(edge), allowNulls=True, domain=_normalize_domain(schema=schema))
    else:
        edge = wrap(edge)
        if not edge.name and not isinstance(edge.value, basestring):
            Log.error("You must name compound and complex edges: {{edge}}", edge=edge)

        if isinstance(edge.value, (list, set)) and not edge.domain:
            # COMPLEX EDGE IS SHORT HAND
            domain = _normalize_domain(schema=schema)
            domain.dimension = Dict(fields=edge.value)

            return Dict(name=edge.name, allowNulls=bool(coalesce(edge.allowNulls, True)), domain=domain)

        domain = _normalize_domain(edge.domain, schema=schema)

        return Dict(
            name=coalesce(edge.name, edge.value),
            value=jx_expression(edge.value),
            range=_normalize_range(edge.range),
            allowNulls=bool(coalesce(edge.allowNulls, True)),
            domain=domain,
        )
예제 #33
0
def _normalize_window(window, schema=None):
    v = window.value
    try:
        expr = jx_expression(v)
    except Exception:
        expr = ScriptOp("script", v)

    return Dict(
        name=coalesce(window.name, window.value),
        value=expr,
        edges=[_normalize_edge(e, schema) for e in listwrap(window.edges)],
        sort=_normalize_sort(window.sort),
        aggregate=window.aggregate,
        range=_normalize_range(window.range),
        where=_normalize_where(window.where, schema=schema))
예제 #34
0
def es_fieldop(es, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)
    FromES.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter": simplify_esfilter(jx_expression(query.where).to_esfilter())
        }
    }
    FromES.size = coalesce(query.limit, 200000)
    FromES.fields = FlatList()
    for s in select.value:
        if s == "*":
            FromES.fields = None
        elif isinstance(s, list):
            FromES.fields.extend(s)
        elif isinstance(s, Mapping):
            FromES.fields.extend(s.values())
        else:
            FromES.fields.append(s)
    FromES.sort = [{s.field: "asc" if s.sort >= 0 else "desc"} for s in query.sort]

    data = es09.util.post(es, FromES, query.limit)

    T = data.hits.hits
    matricies = {}
    for s in select:
        if s.value == "*":
            matricies[s.name] = Matrix.wrap([t._source for t in T])
        elif isinstance(s.value, Mapping):
            # for k, v in s.value.items():
            #     matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T])
            matricies[s.name] = Matrix.wrap([{k: unwrap(t.fields).get(v, None) for k, v in s.value.items()}for t in T])
        elif isinstance(s.value, list):
            matricies[s.name] = Matrix.wrap([tuple(unwrap(t.fields).get(ss, None) for ss in s.value) for t in T])
        elif not s.value:
            matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
        else:
            try:
                matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
            except Exception as e:
                Log.error("", e)

    cube = Cube(query.select, query.edges, matricies, frum=query)
    cube.frum = query
    return cube
예제 #35
0
    def wrap(query, schema=None):
        """
        NORMALIZE QUERY SO IT CAN STILL BE JSON
        """
        if isinstance(query, QueryOp) or query == None:
            return query

        query = wrap(query)

        output = QueryOp("from", None)
        output.format = query.format
        output.frum = wrap_from(query["from"], schema=schema)
        if not schema and isinstance(output.frum, Schema):
            schema = output.frum
        if not schema and hasattr(output.frum, "schema"):
            schema = output.frum.schema

        if query.select or isinstance(query.select, (Mapping, list)):
            output.select = _normalize_selects(query.select, query.frum, schema=schema)
        else:
            if query.edges or query.groupby:
                output.select = Data(name="count", value=jx_expression("."), aggregate="count", default=0)
            else:
                output.select = _normalize_selects(".", query.frum)

        if query.groupby and query.edges:
            Log.error("You can not use both the `groupby` and `edges` clauses in the same query!")
        elif query.edges:
            output.edges = _normalize_edges(query.edges, schema=schema)
            output.groupby = Null
        elif query.groupby:
            output.edges = Null
            output.groupby = _normalize_groupby(query.groupby, schema=schema)
        else:
            output.edges = Null
            output.groupby = Null

        output.where = _normalize_where(query.where, schema=schema)
        output.window = [_normalize_window(w) for w in listwrap(query.window)]
        output.having = None
        output.sort = _normalize_sort(query.sort)
        output.limit = Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))
        if not Math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        return output
예제 #36
0
def _normalize_window(window, schema=None):
    v = window.value
    try:
        expr = jx_expression(v)
    except Exception:
        expr = ScriptOp("script", v)

    return Dict(
        name=coalesce(window.name, window.value),
        value=expr,
        edges=[_normalize_edge(e, schema) for e in listwrap(window.edges)],
        sort=_normalize_sort(window.sort),
        aggregate=window.aggregate,
        range=_normalize_range(window.range),
        where=_normalize_where(window.where, schema=schema),
    )
예제 #37
0
def es_deepop(es, mvel, query):
    FromES = es09.util.build_es_query(query)

    select = query.edges

    temp_query = query.copy()
    temp_query.select = select
    temp_query.edges = DictList()
    FromES.facets.mvel = {
        "terms": {
            "script_field": mvel.code(temp_query),
            "size": query.limit
        },
        "facet_filter":
        simplify_esfilter(jx_expression(query.where).to_esfilter())
    }

    data = es09.util.post(es, FromES, query.limit)

    rows = unpack_terms(data.facets.mvel, query.edges)
    terms = zip(*rows)

    # NUMBER ALL EDGES FOR JSON EXPRESSION INDEXING
    edges = query.edges
    for f, e in enumerate(edges):
        for r in terms[f]:
            e.domain.getPartByKey(r)

        e.index = f
        for p, part in enumerate(e.domain.partitions):
            part.dataIndex = p
        e.domain.NULL.dataIndex = len(e.domain.partitions)

    # MAKE CUBE
    dims = [len(e.domain.partitions) for e in query.edges]
    output = Matrix(*dims)

    # FILL CUBE
    for r in rows:
        term_coord = [
            e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)
        ]
        output[term_coord] = SUM(output[term_coord], r[-1])

    cube = Cube(query.select, query.edges, {query.select.name: output})
    cube.frum = query
    return cube
예제 #38
0
    def _edges_op(self, query):
        selects = []
        for s in listwrap(query.select):
            if s.value == "." and s.aggregate == "count":
                selects.append("COUNT(1) AS " + quote_table(s.name))
            else:
                selects.append(sql_aggs[s.aggregate] + "(" +
                               jx_expression(s.value).to_sql() + ") AS " +
                               quote_table(s.name))

        for w in query.window:
            selects.append(self._window_op(self, query, w))

        agg_prefix = " FROM "
        agg_suffix = "\n"

        agg = ""
        ons = []
        groupby = ""
        groupby_prefix = "\nGROUP BY "

        for i, e in enumerate(query.edges):
            edge_alias = "e" + unicode(i)
            edge_value = e.value.to_sql()
            value = edge_value
            for v in e.value.vars():
                value = value.replace(quote_table(v), "a." + quote_table(v))

            edge_name = quote_table(e.name)
            selects.append(edge_alias + "." + edge_name + " AS " + edge_name)
            agg += \
                agg_prefix + "(" + \
                "SELECT DISTINCT " + edge_value + " AS " + edge_name + " FROM " + quote_table(self.name) + \
                ") " + edge_alias + \
                agg_suffix
            agg_prefix = " LEFT JOIN "
            agg_suffix = " ON 1=1\n"
            ons.append(edge_alias + "." + edge_name + " = " + value)
            groupby += groupby_prefix + edge_alias + "." + edge_name
            groupby_prefix = ",\n"

        agg += agg_prefix + quote_table(
            self.name) + " a ON " + " AND ".join(ons)

        where = "\nWHERE " + query.where.to_sql()

        return "SELECT " + (",\n".join(selects)) + agg + where + groupby
    def _edges_op(self, query):
        selects = []
        for s in listwrap(query.select):
            if s.value=="." and s.aggregate=="count":
                selects.append("COUNT(1) AS " + quote_table(s.name))
            else:
                selects.append(sql_aggs[s.aggregate]+"("+jx_expression(s.value).to_sql() + ") AS " + quote_table(s.name))

        for w in query.window:
            selects.append(self._window_op(self, query, w))

        agg_prefix = " FROM "
        agg_suffix = "\n"

        agg = ""
        ons = []
        groupby = ""
        groupby_prefix = "\nGROUP BY "

        for i, e in enumerate(query.edges):
            edge_alias = "e" + unicode(i)
            edge_value = e.value.to_sql()
            value = edge_value
            for v in e.value.vars():
                value = value.replace(quote_table(v), "a."+quote_table(v))

            edge_name = quote_table(e.name)
            selects.append(edge_alias + "." + edge_name + " AS " + edge_name)
            agg += \
                agg_prefix + "(" + \
                "SELECT DISTINCT " + edge_value + " AS " + edge_name + " FROM " + quote_table(self.name) + \
                ") " + edge_alias + \
                agg_suffix
            agg_prefix = " LEFT JOIN "
            agg_suffix = " ON 1=1\n"
            ons.append(edge_alias + "." + edge_name + " = "+ value)
            groupby += groupby_prefix + edge_alias + "." + edge_name
            groupby_prefix = ",\n"

        agg += agg_prefix + quote_table(self.name) + " a ON "+" AND ".join(ons)

        where = "\nWHERE " + query.where.to_sql()

        return "SELECT " + (",\n".join(selects)) + agg + where+groupby
예제 #40
0
def es_deepop(es, mvel, query):
    FromES = es09.util.build_es_query(query)

    select = query.edges

    temp_query = query.copy()
    temp_query.select = select
    temp_query.edges = FlatList()
    FromES.facets.mvel = {
        "terms": {
            "script_field": mvel.code(temp_query),
            "size": query.limit
        },
        "facet_filter": simplify_esfilter(jx_expression(query.where).to_esfilter())
    }

    data = es09.util.post(es, FromES, query.limit)

    rows = unpack_terms(data.facets.mvel, query.edges)
    terms = zip(*rows)

    # NUMBER ALL EDGES FOR JSON EXPRESSION INDEXING
    edges = query.edges
    for f, e in enumerate(edges):
        for r in terms[f]:
            e.domain.getPartByKey(r)

        e.index = f
        for p, part in enumerate(e.domain.partitions):
            part.dataIndex = p
        e.domain.NULL.dataIndex = len(e.domain.partitions)

    # MAKE CUBE
    dims = [len(e.domain.partitions) for e in query.edges]
    output = Matrix(*dims)

    # FILL CUBE
    for r in rows:
        term_coord = [e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)]
        output[term_coord] = SUM(output[term_coord], r[-1])

    cube = Cube(query.select, query.edges, {query.select.name: output})
    cube.frum = query
    return cube
    def _groupby_op(self, query):
        selects = []
        for s in listwrap(query.select):
            if s.value=="." and s.aggregate=="count":
                selects.append("COUNT(1) AS " + quote_table(s.name))
            else:
                selects.append(sql_aggs[s.aggregate]+"("+jx_expression(s.value).to_sql() + ") AS " + quote_table(s.name))

        for w in query.window:
            selects.append(self._window_op(self, query, w))

        agg = " FROM " + quote_table(self.name) + " a\n"
        groupby = ""
        groupby_prefix = " GROUP BY "

        for i, e in enumerate(query.edges):
            value = e.to_sql()
            groupby += groupby_prefix + value
            groupby_prefix = ",\n"

        where = "\nWHERE " + query.where.to_sql()

        return "SELECT " + (",\n".join(selects)) + agg + where+groupby
예제 #42
0
def _normalize_select(select, frum, schema=None):
    """
    :param select: ONE SELECT COLUMN
    :param frum: TABLE TO get_columns()
    :param schema: SCHEMA TO LOOKUP NAMES FOR DEFINITIONS
    :return: AN ARRAY OF SELECT COLUMNS
    """
    if not _Column:
        _late_import()

    if isinstance(select, basestring):
        canonical = select = Data(value=select)
    else:
        select = wrap(select)
        canonical = select.copy()

    canonical.aggregate = coalesce(canonical_aggregates[select.aggregate].name,
                                   select.aggregate, "none")
    canonical.default = coalesce(
        select.default, canonical_aggregates[canonical.aggregate].default)

    if hasattr(unwrap(frum), "_normalize_select"):
        return frum._normalize_select(canonical)

    output = []
    if not select.value or select.value == ".":
        output.extend([
            set_default({
                "name": c.name,
                "value": jx_expression(c.name)
            }, canonical) for c in frum.get_leaves()
        ])
    elif isinstance(select.value, basestring):
        if select.value.endswith(".*"):
            base_name = select.value[:-2]
            canonical.name = coalesce(select.name, base_name, select.aggregate)
            value = jx_expression(select[:-2])
            if not isinstance(value, Variable):
                Log.error("`*` over general expression not supported yet")
                output.append([
                    set_default(
                        {
                            "name": base_name,
                            "value": LeavesOp("leaves", value),
                            "format": "dict"  # MARKUP FOR DECODING
                        },
                        canonical) for c in frum.get_columns()
                    if c.type not in STRUCT
                ])
            else:
                output.extend([
                    set_default(
                        {
                            "name":
                            base_name + "." +
                            literal_field(c.name[len(base_name) + 1:]),
                            "value":
                            jx_expression(c.name)
                        }, canonical) for c in frum.get_leaves()
                    if c.name.startswith(base_name + ".")
                ])
        else:
            canonical.name = coalesce(select.name, select.value,
                                      select.aggregate)
            canonical.value = jx_expression(select.value)
            output.append(canonical)

    output = wrap(output)
    if any(n == None for n in output.name):
        Log.error("expecting select to have a name: {{select}}", select=select)
    return output
예제 #43
0
def DataClass(name, columns, constraint=True):
    """
    Use the DataClass to define a class, but with some extra features:
    1. restrict the datatype of property
    2. restrict if `required`, or if `nulls` are allowed
    3. generic constraints on object properties

    It is expected that this class become a real class (or be removed) in the
    long term because it is expensive to use and should only be good for
    verifying program correctness, not user input.

    :param name: Name of the class we are creating
    :param columns: Each columns[i] has properties {
            "name",     - (required) name of the property
            "required", - False if it must be defined (even if None)
            "nulls",    - True if property can be None, or missing
            "default",  - A default value, if none is provided
            "type"      - a Python datatype
        }
    :param constraint: a JSON query Expression for extra constraints
    :return: The class that has been created
    """

    columns = wrap([{"name": c, "required": True, "nulls": False, "type": object} if isinstance(c, basestring) else c for c in columns])
    slots = columns.name
    required = wrap(filter(lambda c: c.required and not c.nulls and not c.default, columns)).name
    nulls = wrap(filter(lambda c: c.nulls, columns)).name
    defaults = {c.name: coalesce(c.default, None) for c in columns}
    types = {c.name: coalesce(c.type, object) for c in columns}

    code = expand_template(
"""
from __future__ import unicode_literals
from collections import Mapping

meta = None
types_ = {{types}}
defaults_ = {{defaults}}

class {{class_name}}(Mapping):
    __slots__ = {{slots}}


    def _constraint(row, rownum, rows):
        return {{constraint_expr}}

    def __init__(self, **kwargs):
        if not kwargs:
            return

        for s in {{slots}}:
            object.__setattr__(self, s, kwargs.get(s, {{defaults}}.get(s, None)))

        missed = {{required}}-set(kwargs.keys())
        if missed:
            Log.error("Expecting properties {"+"{missed}}", missed=missed)

        illegal = set(kwargs.keys())-set({{slots}})
        if illegal:
            Log.error("{"+"{names}} are not a valid properties", names=illegal)

        if not self._constraint(0, [self]):
            Log.error("constraint not satisfied {"+"{expect}}\\n{"+"{value|indent}}", expect={{constraint}}, value=self)

    def __getitem__(self, item):
        return getattr(self, item)

    def __setitem__(self, item, value):
        setattr(self, item, value)
        return self

    def __setattr__(self, item, value):
        if item not in {{slots}}:
            Log.error("{"+"{item|quote}} not valid attribute", item=item)
        object.__setattr__(self, item, value)
        if not self._constraint(0, [self]):
            Log.error("constraint not satisfied {"+"{expect}}\\n{"+"{value|indent}}", expect={{constraint}}, value=self)

    def __getattr__(self, item):
        Log.error("{"+"{item|quote}} not valid attribute", item=item)

    def __hash__(self):
        return object.__hash__(self)

    def __eq__(self, other):
        if isinstance(other, {{class_name}}) and dict(self)==dict(other) and self is not other:
            Log.error("expecting to be same object")
        return self is other

    def __dict__(self):
        return {k: getattr(self, k) for k in {{slots}}}

    def items(self):
        return ((k, getattr(self, k)) for k in {{slots}})

    def __copy__(self):
        _set = object.__setattr__
        output = object.__new__({{class_name}})
        {{assign}}
        return output

    def __iter__(self):
        return {{slots}}.__iter__()

    def __len__(self):
        return {{len_slots}}

    def __str__(self):
        return str({{dict}})

temp = {{class_name}}
""",
        {
            "class_name": name,
            "slots": "(" + (", ".join(convert.value2quote(s) for s in slots)) + ")",
            "required": "{" + (", ".join(convert.value2quote(s) for s in required)) + "}",
            "nulls": "{" + (", ".join(convert.value2quote(s) for s in nulls)) + "}",
            "defaults": jx_expression({"literal": defaults}).to_python(),
            "len_slots": len(slots),
            "dict": "{" + (", ".join(convert.value2quote(s) + ": self." + s for s in slots)) + "}",
            "assign": "; ".join("_set(output, "+convert.value2quote(s)+", self."+s+")" for s in slots),
            "types": "{" + (",".join(convert.string2quote(k) + ": " + v.__name__ for k, v in types.items())) + "}",
            "constraint_expr": jx_expression(constraint).to_python(),
            "constraint": convert.value2json(constraint)
        }
    )

    return _exec(code, name)
예제 #44
0
def _normalize_edge(edge, dim_index, schema=None):
    """
    :param edge: Not normalized edge 
    :param dim_index: Dimensions are ordered; this is this edge's index into that order
    :param schema: for context
    :return: a normalized edge
    """
    if not _Column:
        _late_import()

    if edge == None:
        Log.error("Edge has no value, or expression is empty")
    elif isinstance(edge, basestring):
        if schema:
            try:
                e = schema[edge]
            except Exception:
                e = None
            e = unwraplist(e)
            if e and not isinstance(e, (_Column, set, list)):
                if isinstance(e, _Column):
                    return [Data(
                        name=edge,
                        value=jx_expression(edge),
                        allowNulls=True,
                        dim=dim_index,
                        domain=_normalize_domain(domain=e, schema=schema)
                    )]
                elif isinstance(e.fields, list) and len(e.fields) == 1:
                    return [Data(
                        name=e.name,
                        value=jx_expression(e.fields[0]),
                        allowNulls=True,
                        dim=dim_index,
                        domain=e.getDomain()
                    )]
                else:
                    return [Data(
                        name=e.name,
                        allowNulls=True,
                        dim=dim_index,
                        domain=e.getDomain()
                    )]
        return [Data(
            name=edge,
            value=jx_expression(edge),
            allowNulls=True,
            dim=dim_index,
            domain=_normalize_domain(schema=schema)
        )]
    else:
        edge = wrap(edge)
        if not edge.name and not isinstance(edge.value, basestring):
            Log.error("You must name compound and complex edges: {{edge}}", edge=edge)

        if isinstance(edge.value, (list, set)) and not edge.domain:
            # COMPLEX EDGE IS SHORT HAND
            domain = _normalize_domain(schema=schema)
            domain.dimension = Data(fields=edge.value)

            return [Data(
                name=edge.name,
                value=jx_expression(edge.value),
                allowNulls=bool(coalesce(edge.allowNulls, True)),
                dim=dim_index,
                domain=domain
            )]

        domain = _normalize_domain(edge.domain, schema=schema)

        return [Data(
            name=coalesce(edge.name, edge.value),
            value=jx_expression(edge.value),
            range=_normalize_range(edge.range),
            allowNulls=bool(coalesce(edge.allowNulls, True)),
            dim=dim_index,
            domain=domain
        )]
예제 #45
0
            allowNulls=True,
            domain=_normalize_domain(schema=schema)
        )
    else:
        edge = wrap(edge)
        if not edge.name and not isinstance(edge.value, basestring):
            Log.error("You must name compound and complex edges: {{edge}}", edge=edge)

        if isinstance(edge.value, (list, set)) and not edge.domain:
            # COMPLEX EDGE IS SHORT HAND
            domain = _normalize_domain(schema=schema)
            domain.dimension = Dict(fields=edge.value)

            return Dict(
                name=edge.name,
                value=jx_expression(edge.value),
                allowNulls=bool(coalesce(edge.allowNulls, True)),
                domain=domain
            )

        domain = _normalize_domain(edge.domain, schema=schema)

        return Dict(
            name=coalesce(edge.name, edge.value),
            value=jx_expression(edge.value),
            range=_normalize_range(edge.range),
            allowNulls=bool(coalesce(edge.allowNulls, True)),
            domain=domain
        )

예제 #46
0
def es_aggsop(es, frum, query):
    select = wrap([s.copy() for s in listwrap(query.select)])
    es_column_map = {c.name: c.es_column for c in frum._columns}

    es_query = Dict()
    new_select = Dict()  # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
    formula = []
    for s in select:
        if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".":
            s.pull = "doc_count"
        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                if frum.typed:
                    # STATISITCAL AGGS IMPLY $value, WHILE OTHERS CAN BE ANYTHING
                    if s.aggregate in NON_STATISTICAL_AGGS:
                        # TODO: HANDLE BOTH $value AND $objects TO COUNT
                        Log.error("do not know how to handle")
                    else:
                        s.value.var = "$value"
                        new_select["$value"] += [s]
                else:
                    if s.aggregate in NON_STATISTICAL_AGGS:
                        # TODO:  WE SHOULD BE ABLE TO COUNT, BUT WE MUST *OR* ALL LEAF VALUES TO DO IT
                        Log.error("do not know how to handle")
                    else:
                        Log.error(
                            'Not expecting ES to have a value at "." which {{agg}} can be applied', agg=s.aggregate
                        )
            elif s.aggregate == "count":
                s.value = s.value.map(es_column_map)
                new_select["count_" + literal_field(s.value.var)] += [s]
            else:
                s.value = s.value.map(es_column_map)
                new_select[literal_field(s.value.var)] += [s]
        else:
            formula.append(s)

    for canonical_name, many in new_select.items():
        representative = many[0]
        if representative.value.var == ".":
            Log.error("do not know how to handle")
        else:
            field_name = representative.value.var

        # canonical_name=literal_field(many[0].name)
        for s in many:
            if s.aggregate == "count":
                es_query.aggs[literal_field(canonical_name)].value_count.field = field_name
                s.pull = literal_field(canonical_name) + ".value"
            elif s.aggregate == "median":
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")

                es_query.aggs[key].percentiles.field = field_name
                es_query.aggs[key].percentiles.percents += [50]
                s.pull = key + ".values.50\.0"
            elif s.aggregate == "percentile":
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")
                if isinstance(s.percentile, basestring) or s.percetile < 0 or 1 < s.percentile:
                    Log.error("Expecting percentile to be a float from 0.0 to 1.0")
                percent = Math.round(s.percentile * 100, decimal=6)

                es_query.aggs[key].percentiles.field = field_name
                es_query.aggs[key].percentiles.percents += [percent]
                s.pull = key + ".values." + literal_field(unicode(percent))
            elif s.aggregate == "cardinality":
                # ES USES DIFFERENT METHOD FOR CARDINALITY
                key = literal_field(canonical_name + " cardinality")

                es_query.aggs[key].cardinality.field = field_name
                s.pull = key + ".value"
            elif s.aggregate == "stats":
                # REGULAR STATS
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].extended_stats.field = field_name

                # GET MEDIAN TOO!
                median_name = literal_field(canonical_name + " percentile")
                es_query.aggs[median_name].percentiles.field = field_name
                es_query.aggs[median_name].percentiles.percents += [50]

                s.pull = {
                    "count": stats_name + ".count",
                    "sum": stats_name + ".sum",
                    "min": stats_name + ".min",
                    "max": stats_name + ".max",
                    "avg": stats_name + ".avg",
                    "sos": stats_name + ".sum_of_squares",
                    "std": stats_name + ".std_deviation",
                    "var": stats_name + ".variance",
                    "median": median_name + ".values.50\.0",
                }
            elif s.aggregate == "union":
                # USE TERMS AGGREGATE TO SIMULATE union
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].terms.field = field_name
                es_query.aggs[stats_name].terms.size = Math.min(s.limit, MAX_LIMIT)
                s.pull = stats_name + ".buckets.key"
            else:
                # PULL VALUE OUT OF THE stats AGGREGATE
                es_query.aggs[literal_field(canonical_name)].extended_stats.field = field_name
                s.pull = literal_field(canonical_name) + "." + aggregates1_4[s.aggregate]

    for i, s in enumerate(formula):
        canonical_name = literal_field(s.name)
        abs_value = jx_expression(s.value).map(es_column_map)

        if s.aggregate == "count":
            es_query.aggs[literal_field(canonical_name)].value_count.script = abs_value.to_ruby()
            s.pull = literal_field(canonical_name) + ".value"
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")

            es_query.aggs[key].percentiles.script = abs_value.to_ruby()
            es_query.aggs[key].percentiles.percents += [50]
            s.pull = key + ".values.50\.0"
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = Math.round(s.percentile * 100, decimal=6)

            es_query.aggs[key].percentiles.script = abs_value.to_ruby()
            es_query.aggs[key].percentiles.percents += [percent]
            s.pull = key + ".values." + literal_field(unicode(percent))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"

            es_query.aggs[key].cardinality.script = abs_value.to_ruby()
            s.pull = key + ".value"
        elif s.aggregate == "stats":
            # REGULAR STATS
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].extended_stats.script = abs_value.to_ruby()

            # GET MEDIAN TOO!
            median_name = literal_field(canonical_name + " percentile")
            es_query.aggs[median_name].percentiles.script = abs_value.to_ruby()
            es_query.aggs[median_name].percentiles.percents += [50]

            s.pull = {
                "count": stats_name + ".count",
                "sum": stats_name + ".sum",
                "min": stats_name + ".min",
                "max": stats_name + ".max",
                "avg": stats_name + ".avg",
                "sos": stats_name + ".sum_of_squares",
                "std": stats_name + ".std_deviation",
                "var": stats_name + ".variance",
                "median": median_name + ".values.50\.0",
            }
        elif s.aggregate == "union":
            # USE TERMS AGGREGATE TO SIMULATE union
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].terms.script_field = abs_value.to_ruby()
            s.pull = stats_name + ".buckets.key"
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
            es_query.aggs[canonical_name].extended_stats.script = abs_value.to_ruby()

    decoders = get_decoders_by_depth(query)
    start = 0

    vars_ = query.where.vars()

    # <TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
    split_where = split_expression_by_depth(query.where, schema=frum, map_=es_column_map)

    if len(split_field(frum.name)) > 1:
        if any(split_where[2::]):
            Log.error("Where clause is too deep")

        for d in decoders[1]:
            es_query = d.append_query(es_query, start)
            start += d.num_columns

        if split_where[1]:
            # TODO: INCLUDE FILTERS ON EDGES
            filter_ = simplify_esfilter(AndOp("and", split_where[1]).to_esfilter())
            es_query = Dict(aggs={"_filter": set_default({"filter": filter_}, es_query)})

        es_query = wrap({"aggs": {"_nested": set_default({"nested": {"path": frum.query_path}}, es_query)}})
    else:
        if any(split_where[1::]):
            Log.error("Where clause is too deep")

    for d in decoders[0]:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if split_where[0]:
        # TODO: INCLUDE FILTERS ON EDGES
        filter = simplify_esfilter(AndOp("and", split_where[0]).to_esfilter())
        es_query = Dict(aggs={"_filter": set_default({"filter": filter}, es_query)})
    # </TERRIBLE SECTION>

    if not es_query:
        es_query = wrap({"query": {"match_all": {}}})

    es_query.size = 0

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        format_time = Timer("formatting")
        with format_time:
            decoders = [d for ds in decoders for d in ds]
            result.aggregations.doc_count = coalesce(
                result.aggregations.doc_count, result.hits.total
            )  # IT APPEARS THE OLD doc_count IS GONE

            formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
            if query.edges:
                output = formatter(decoders, result.aggregations, start, query, select)
            elif query.groupby:
                output = groupby_formatter(decoders, result.aggregations, start, query, select)
            else:
                output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.timing.formatting = format_time.duration
        output.meta.timing.es_search = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e)
        Log.error("Some problem", e)
예제 #47
0
 def test_in(self):
     where = {"in": {"a": [1, 2]}}
     result = simplify_esfilter(jx_expression(where).to_esfilter())
     self.assertEqual(result, {"terms": {"a": [1, 2]}})
예제 #48
0
def _normalize_where(where, schema=None):
    if where == None:
        return TrueOp()
    return jx_expression(where)
예제 #49
0
 def test_in_map(self):
     where = {"in": {"a": [1, 2]}}
     result = jx_expression(where).map({"a": "c"}).__data__()
     self.assertEqual(result, {"in": {"c": [1, 2]}})
예제 #50
0
def _normalize_edge(edge, dim_index, schema=None):
    """
    :param edge: Not normalized edge 
    :param dim_index: Dimensions are ordered; this is this edge's index into that order
    :param schema: for context
    :return: a normalized edge
    """
    if not _Column:
        _late_import()

    if edge == None:
        Log.error("Edge has no value, or expression is empty")
    elif isinstance(edge, basestring):
        if schema:
            try:
                e = schema[edge]
            except Exception:
                e = None
            e = unwraplist(e)
            if e and not isinstance(e, (_Column, set, list)):
                if isinstance(e, _Column):
                    return [
                        Data(name=edge,
                             value=jx_expression(edge),
                             allowNulls=True,
                             dim=dim_index,
                             domain=_normalize_domain(domain=e, schema=schema))
                    ]
                elif isinstance(e.fields, list) and len(e.fields) == 1:
                    return [
                        Data(name=e.name,
                             value=jx_expression(e.fields[0]),
                             allowNulls=True,
                             dim=dim_index,
                             domain=e.getDomain())
                    ]
                else:
                    return [
                        Data(name=e.name,
                             allowNulls=True,
                             dim=dim_index,
                             domain=e.getDomain())
                    ]
        return [
            Data(name=edge,
                 value=jx_expression(edge),
                 allowNulls=True,
                 dim=dim_index,
                 domain=_normalize_domain(schema=schema))
        ]
    else:
        edge = wrap(edge)
        if not edge.name and not isinstance(edge.value, basestring):
            Log.error("You must name compound and complex edges: {{edge}}",
                      edge=edge)

        if isinstance(edge.value, (list, set)) and not edge.domain:
            # COMPLEX EDGE IS SHORT HAND
            domain = _normalize_domain(schema=schema)
            domain.dimension = Data(fields=edge.value)

            return [
                Data(name=edge.name,
                     value=jx_expression(edge.value),
                     allowNulls=bool(coalesce(edge.allowNulls, True)),
                     dim=dim_index,
                     domain=domain)
            ]

        domain = _normalize_domain(edge.domain, schema=schema)

        return [
            Data(name=coalesce(edge.name, edge.value),
                 value=jx_expression(edge.value),
                 range=_normalize_range(edge.range),
                 allowNulls=bool(coalesce(edge.allowNulls, True)),
                 dim=dim_index,
                 domain=domain)
        ]
예제 #51
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        desc = wrap(desc)

        self.type = "set"
        self.order = {}
        self.NULL = Null
        self.partitions = FlatList()
        self.primitive = True  # True IF DOMAIN IS A PRIMITIVE VALUE SET

        if isinstance(self.key, set):
            Log.error("problem")

        if not desc.key and (len(desc.partitions) == 0
                             or isinstance(desc.partitions[0],
                                           (basestring, Number, tuple))):
            # ASSUME PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS
            self.key = "value"
            self.map = {}
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                part = {"name": p, "value": p, "dataIndex": i}
                self.partitions.append(part)
                self.map[p] = part
                self.order[p] = i
            self.label = coalesce(self.label, "name")
            self.primitive = True
            return

        if desc.partitions and desc.dimension.fields and len(
                desc.dimension.fields) > 1:
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.dimension.fields)
        elif desc.partitions and isinstance(desc.key, (list, set)):
            # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
        elif desc.partitions and isinstance(desc.partitions[0][desc.key],
                                            Mapping):
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
            # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions)
            # self.map = UniqueIndex(keys=self.key)
        elif len(desc.partitions) == 0:
            # CREATE AN EMPTY DOMAIN
            self.key = "value"
            self.map = {}
            self.order[None] = 0
            self.label = coalesce(self.label, "name")
            return
        elif desc.key == None:
            if desc.partitions and all(desc.partitions.where) or all(
                    desc.partitions.esfilter):
                if not all(desc.partitions.name):
                    Log.error("Expecting all partitions to have a name")
                from pyLibrary.queries.expressions import jx_expression

                self.key = "name"
                self.map = dict()
                self.map[None] = self.NULL
                self.order[None] = len(desc.partitions)
                for i, p in enumerate(desc.partitions):
                    self.partitions.append({
                        "where":
                        jx_expression(coalesce(p.where, p.esfilter)),
                        "name":
                        p.name,
                        "dataIndex":
                        i
                    })
                    self.map[p.name] = p
                    self.order[p.name] = i
                return
            elif desc.partitions and len(set(desc.partitions.value) -
                                         {None}) == len(desc.partitions):
                # TRY A COMMON KEY CALLED "value".  IT APPEARS UNIQUE
                self.key = "value"
                self.map = dict()
                self.map[None] = self.NULL
                self.order[None] = len(desc.partitions)
                for i, p in enumerate(desc.partitions):
                    self.map[p[self.key]] = p
                    self.order[p[self.key]] = i
                self.primitive = False
            else:
                Log.error("Domains must have keys, or partitions")
        elif self.key:
            self.key = desc.key
            self.map = dict()
            self.map[None] = self.NULL
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                self.map[p[self.key]] = p
                self.order[p[self.key]] = i
            self.primitive = False
        else:
            Log.error("Can not hanldle")

        self.label = coalesce(self.label, "name")

        if hasattr(desc.partitions, "__iter__"):
            self.partitions = wrap(list(desc.partitions))
        else:
            Log.error("expecting a list of partitions")
    def update(self, command):
        """
        :param command:  EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT
        """
        command = wrap(command)

        # REJECT DEEP UPDATES
        touched_columns = command.set.keys() | set(listwrap(command["clear"]))
        for c in self.get_leaves():
            if c.name in touched_columns and c.nested_path and len(c.name) > len(c.nested_path[0]):
                Log.error("Deep update not supported")

        # ADD NEW COLUMNS
        where = jx_expression(command.where)
        _vars = where.vars()
        _map = {
            v: c.es_column
            for v in _vars
            for c in self.columns.get(v, Null)
            if c.type not in ["nested", "object"]
            }
        where_sql = where.map(_map).to_sql()
        new_columns = set(command.set.keys()) - set(self.columns.keys())
        for new_column_name in new_columns:
            nested_value = command.set[new_column_name]
            ctype = get_type(nested_value)
            column = Column(
                name=new_column_name,
                type=ctype,
                table=self.name,
                es_index=self.name,
                es_column=typed_column(new_column_name, ctype)
            )
            self.add_column(column)

        # UPDATE THE NESTED VALUES
        for nested_column_name, nested_value in command.set.items():
            if get_type(nested_value) == "nested":
                nested_table_name = join_field(split_field(self.name)+split_field(nested_column_name))
                nested_table = self.nested_tables[nested_table_name]
                self_primary_key = ",".join(quote_table(c.es_column) for u in self.uid for c in self.columns[u])
                extra_key_name = UID_PREFIX+"id"+unicode(len(self.uid))
                extra_key = [e for e in nested_table.columns[extra_key_name]][0]

                sql_command = "DELETE FROM " + quote_table(nested_table.name) + \
                              "\nWHERE EXISTS (" + \
                              "\nSELECT 1 " + \
                              "\nFROM " + quote_table(nested_table.name) + " n" + \
                              "\nJOIN (" + \
                              "\nSELECT " + self_primary_key + \
                              "\nFROM " + quote_table(self.name) + \
                              "\nWHERE " + where_sql + \
                              "\n) t ON " + \
                              " AND ".join(
                                  "t." + quote_table(c.es_column) + " = n." + quote_table(c.es_column)
                                  for u in self.uid
                                  for c in self.columns[u]
                              ) + \
                              ")"
                self.db.execute(sql_command)

                # INSERT NEW RECORDS
                if not nested_value:
                    continue

                doc_collection = {}
                for d in listwrap(nested_value):
                    nested_table.flatten(d, Dict(), doc_collection, path=nested_column_name)

                prefix = "INSERT INTO " + quote_table(nested_table.name) + \
                         "(" + \
                         self_primary_key + "," + \
                         _quote_column(extra_key) + "," + \
                         ",".join(
                             quote_table(c.es_column)
                             for c in doc_collection.get(".", Null).active_columns
                         ) + ")"

                # BUILD THE PARENT TABLES
                parent = "\nSELECT " + \
                         self_primary_key + \
                         "\nFROM " + quote_table(self.name) + \
                         "\nWHERE " + jx_expression(command.where).to_sql()

                # BUILD THE RECORDS
                children = " UNION ALL ".join(
                    "\nSELECT " +
                    quote_value(i) + " " +quote_table(extra_key.es_column) + "," +
                    ",".join(
                        quote_value(row[c.name]) + " " + quote_table(c.es_column)
                        for c in doc_collection.get(".", Null).active_columns
                    )
                    for i, row in enumerate(doc_collection.get(".", Null).rows)
                )

                sql_command = prefix + \
                              "\nSELECT " + \
                              ",".join(
                                  "p." + quote_table(c.es_column)
                                  for u in self.uid for c in self.columns[u]
                              ) + "," + \
                              "c." + _quote_column(extra_key) + "," + \
                              ",".join(
                                  "c." + quote_table(c.es_column)
                                  for c in doc_collection.get(".", Null).active_columns
                              ) + \
                              "\nFROM (" + parent + ") p " + \
                              "\nJOIN (" + children + \
                              "\n) c on 1=1"

                self.db.execute(sql_command)

                # THE CHILD COLUMNS COULD HAVE EXPANDED
                # ADD COLUMNS TO SELF
                for n, cs in nested_table.columns.items():
                    for c in cs:
                        column = Column(
                            name=c.name,
                            type=c.type,
                            table=self.name,
                            es_index=c.es_index,
                            es_column=c.es_column,
                            nested_path=[nested_column_name]+listwrap(c.nested_path)
                        )
                        if c.name not in self.columns:
                            self.columns[column.name] = {column}
                        elif c.type not in [c.type for c in self.columns[c.name]]:
                            self.columns[column.name].add(column)

        command = "UPDATE " + quote_table(self.name) + " SET " + \
                  ",\n".join(
                      [
                          _quote_column(c) + "=" + quote_value(get_if_type(v, c.type))
                          for k, v in command.set.items()
                          if get_type(v) != "nested"
                          for c in self.columns[k]
                          if c.type != "nested" and not c.nested_path
                          ] +
                      [
                          _quote_column(c) + "=NULL"
                          for k in listwrap(command["clear"])
                          if k in self.columns
                          for c in self.columns[k]
                          if c.type != "nested" and not c.nested_path
                          ]
                  ) + \
                  " WHERE " + where_sql

        self.db.execute(command)
예제 #53
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        desc = wrap(desc)

        self.type = "set"
        self.order = {}
        self.NULL = Null
        self.partitions = DictList()
        self.primitive = True  # True IF DOMAIN IS A PRIMITIVE VALUE SET

        if isinstance(self.key, set):
            Log.error("problem")

        if not desc.key and (len(desc.partitions)==0 or isinstance(desc.partitions[0], (basestring, Number, tuple))):
            # ASSUME PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS
            self.key = "value"
            self.map = {}
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                part = {"name": p, "value": p, "dataIndex": i}
                self.partitions.append(part)
                self.map[p] = part
                self.order[p] = i
            self.label = coalesce(self.label, "name")
            self.primitive = True
            return

        if desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1:
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.dimension.fields)
        elif desc.partitions and isinstance(desc.key, (list, set)):
            # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
        elif desc.partitions and isinstance(desc.partitions[0][desc.key], Mapping):
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
            # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions)
            # self.map = UniqueIndex(keys=self.key)
        elif len(desc.partitions) == 0:
            # CREATE AN EMPTY DOMAIN
            self.key = "value"
            self.map = {}
            self.order[None] = 0
            self.label = coalesce(self.label, "name")
            return
        elif desc.key == None:
            if desc.partitions and all(desc.partitions.where) or all(desc.partitions.esfilter):
                if not all(desc.partitions.name):
                    Log.error("Expecting all partitions to have a name")
                from pyLibrary.queries.expressions import jx_expression

                self.key = "name"
                self.map = dict()
                self.map[None] = self.NULL
                self.order[None] = len(desc.partitions)
                for i, p in enumerate(desc.partitions):
                    self.partitions.append({
                        "where": jx_expression(coalesce(p.where, p.esfilter)),
                        "name": p.name,
                        "dataIndex": i
                    })
                    self.map[p.name] = p
                    self.order[p.name] = i
                return
            elif desc.partitions and len(set(desc.partitions.value)-{None}) == len(desc.partitions):
                # TRY A COMMON KEY CALLED "value".  IT APPEARS UNIQUE
                self.key = "value"
                self.map = dict()
                self.map[None] = self.NULL
                self.order[None] = len(desc.partitions)
                for i, p in enumerate(desc.partitions):
                    self.map[p[self.key]] = p
                    self.order[p[self.key]] = i
                self.primitive = False
            else:
                Log.error("Domains must have keys, or partitions")
        elif self.key:
            self.key = desc.key
            self.map = dict()
            self.map[None] = self.NULL
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                self.map[p[self.key]] = p
                self.order[p[self.key]] = i
            self.primitive = False
        else:
            Log.error("Can not hanldle")

        self.label = coalesce(self.label, "name")

        if hasattr(desc.partitions, "__iter__"):
            self.partitions = wrap(list(desc.partitions))
        else:
            Log.error("expecting a list of partitions")
예제 #54
0
def _normalize_where(where, schema=None):
    if where == None:
        return TrueOp()
    return jx_expression(where)
예제 #55
0
def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(
        query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([
        s.value == None and s.aggregate not in ("count", "none")
        for s in select
    ])  # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex:
        if len(select) == 1 and not select[0].value or select[0].value == "*":
            FromES = wrap({
                "query": {
                    "filtered": {
                        "query": {
                            "match_all": {}
                        },
                        "filter":
                        simplify_esfilter(
                            jx_expression(query.where).to_esfilter())
                    }
                },
                "sort": query.sort,
                "size": 1
            })
        elif all(isinstance(v, Variable) for v in select.value):
            FromES = wrap({
                "query": {
                    "filtered": {
                        "query": {
                            "match_all": {}
                        },
                        "filter": simplify_esfilter(query.where.to_esfilter())
                    }
                },
                "fields": select.value,
                "sort": query.sort,
                "size": coalesce(query.limit, 200000)
            })
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE_FILTER  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter":
            simplify_esfilter(jx_expression(query.where).to_esfilter())
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter":
            simplify_esfilter(jx_expression(query.where).to_esfilter())
        }

    data = es09.util.post(es, FromES, query.limit)

    if len(select) == 1 and not select[0].value or select[0].value == "*":
        # SPECIAL CASE FOR SINGLE COUNT
        cube = wrap(data).hits.hits._source
    elif isinstance(select[0].value, Variable):
        # SPECIAL CASE FOR SINGLE TERM
        cube = wrap(data).hits.hits.fields
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = zip(*data_list)
            cube = Cube(
                select, [],
                {s.name: Matrix(list=output[i])
                 for i, s in enumerate(select)})

    return Dict(meta={"esquery": FromES}, data=cube)
예제 #56
0
def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([s.value == None and s.aggregate not in ("count", "none") for s in select])   # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex:
        if len(select) == 1 and not select[0].value or select[0].value == "*":
            FromES = wrap({
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": simplify_esfilter(jx_expression(query.where).to_esfilter())
                }},
                "sort": query.sort,
                "size": 1
            })
        elif all(isinstance(v, Variable) for v in select.value):
            FromES = wrap({
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": simplify_esfilter(query.where.to_esfilter())
                }},
                "fields": select.value,
                "sort": query.sort,
                "size": coalesce(query.limit, 200000)
            })
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE_FILTER  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(jx_expression(query.where).to_esfilter())
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(jx_expression(query.where).to_esfilter())
        }

    data = es09.util.post(es, FromES, query.limit)

    if len(select) == 1 and  not select[0].value or select[0].value == "*":
        # SPECIAL CASE FOR SINGLE COUNT
        cube = wrap(data).hits.hits._source
    elif isinstance(select[0].value, Variable):
        # SPECIAL CASE FOR SINGLE TERM
        cube = wrap(data).hits.hits.fields
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = zip(*data_list)
            cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)})

    return Data(
        meta={"esquery": FromES},
        data=cube
    )
예제 #57
0
def _normalize_select(select, frum, schema=None):
    """
    :param select: ONE SELECT COLUMN
    :param frum: TABLE TO get_columns()
    :param schema: SCHEMA TO LOOKUP NAMES FOR DEFINITIONS
    :return: AN ARRAY OF SELECT COLUMNS
    """
    if not _Column:
        _late_import()

    if isinstance(select, basestring):
        canonical = select = Dict(value=select)
    else:
        select = wrap(select)
        canonical = select.copy()

    canonical.aggregate = coalesce(canonical_aggregates[select.aggregate].name, select.aggregate, "none")
    canonical.default = coalesce(select.default, canonical_aggregates[canonical.aggregate].default)

    if hasattr(frum, "_normalize_select"):
        return frum._normalize_select(canonical)

    output = []
    if not select.value or select.value == ".":
        output.extend(
            [set_default({"name": c.name, "value": jx_expression(c.name)}, canonical) for c in frum.get_leaves()]
        )
    elif isinstance(select.value, basestring):
        if select.value.endswith(".*"):
            base_name = select.value[:-2]
            canonical.name = coalesce(select.name, base_name, select.aggregate)
            value = jx_expression(select[:-2])
            if not isinstance(value, Variable):
                Log.error("`*` over general expression not supported yet")
                output.append(
                    [
                        set_default(
                            {
                                "name": base_name,
                                "value": LeavesOp("leaves", value),
                                "format": "dict",  # MARKUP FOR DECODING
                            },
                            canonical,
                        )
                        for c in frum.get_columns()
                        if c.type not in ["object", "nested"]
                    ]
                )
            else:
                output.extend(
                    [
                        set_default(
                            {
                                "name": base_name + "." + literal_field(c.name[len(base_name) + 1 :]),
                                "value": jx_expression(c.name),
                            },
                            canonical,
                        )
                        for c in frum.get_leaves()
                        if c.name.startswith(base_name + ".")
                    ]
                )
        else:
            canonical.name = coalesce(select.name, select.value, select.aggregate)
            canonical.value = jx_expression(select.value)
            output.append(canonical)

    output = wrap(output)
    if any(n == None for n in output.name):
        Log.error("expecting select to have a name: {{select}}", select=select)
    return output