コード例 #1
0
ファイル: query.py プロジェクト: klahnakoski/MoDevETL
def _map_term_using_schema(master, path, term, schema_edges):
    """
    IF THE WHERE CLAUSE REFERS TO FIELDS IN THE SCHEMA, THEN EXPAND THEM
    """
    output = DictList()
    for k, v in term.items():
        dimension = schema_edges[k]
        if isinstance(dimension, Dimension):
            domain = dimension.getDomain()
            if dimension.fields:
                if isinstance(dimension.fields, Mapping):
                    # EXPECTING A TUPLE
                    for local_field, es_field in dimension.fields.items():
                        local_value = v[local_field]
                        if local_value == None:
                            output.append({"missing": {"field": es_field}})
                        else:
                            output.append({"term": {es_field: local_value}})
                    continue

                if len(dimension.fields) == 1 and is_keyword(dimension.fields[0]):
                    # SIMPLE SINGLE-VALUED FIELD
                    if domain.getPartByKey(v) is domain.NULL:
                        output.append({"missing": {"field": dimension.fields[0]}})
                    else:
                        output.append({"term": {dimension.fields[0]: v}})
                    continue

                if AND(is_keyword(f) for f in dimension.fields):
                    # EXPECTING A TUPLE
                    if not isinstance(v, tuple):
                        Log.error("expecing {{name}}={{value}} to be a tuple",  name= k,  value= v)
                    for i, f in enumerate(dimension.fields):
                        vv = v[i]
                        if vv == None:
                            output.append({"missing": {"field": f}})
                        else:
                            output.append({"term": {f: vv}})
                    continue
            if len(dimension.fields) == 1 and is_keyword(dimension.fields[0]):
                if domain.getPartByKey(v) is domain.NULL:
                    output.append({"missing": {"field": dimension.fields[0]}})
                else:
                    output.append({"term": {dimension.fields[0]: v}})
                continue
            if domain.partitions:
                part = domain.getPartByKey(v)
                if part is domain.NULL or not part.esfilter:
                    Log.error("not expected to get NULL")
                output.append(part.esfilter)
                continue
            else:
                Log.error("not expected")
        elif isinstance(v, Mapping):
            sub = _map_term_using_schema(master, path + [k], v, schema_edges[k])
            output.append(sub)
            continue

        output.append({"term": {k: v}})
    return {"and": output}
コード例 #2
0
ファイル: aggs.py プロジェクト: klahnakoski/intermittents
def _range_composer(edge, domain, es_query, to_float):
    # USE RANGES
    _min = coalesce(domain.min, MAX(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if is_keyword(edge.value):
        calc = {"field": edge.value}
    else:
        calc = {"script": qb_expression_to_ruby(edge.value)}

    if is_keyword(edge.value):
        missing_range = {"or": [
            {"range": {edge.value: {"lt": to_float(_min)}}},
            {"range": {edge.value: {"gte": to_float(_max)}}}
        ]}
    else:
        missing_range = {"script": {"script": qb_expression_to_ruby({"or": [
            {"lt": [edge.value, to_float(_min)]},
            {"gt": [edge.value, to_float(_max)]},
        ]})}}

    return wrap({"aggs": {
        "_match": set_default(
            {"range": calc},
            {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
            es_query
        ),
        "_missing": set_default(
            {"filter": {"or": [
                missing_range,
                {"missing": {"field": get_all_vars(edge.value)}}
            ]}},
            es_query
        ),
    }})
コード例 #3
0
def es_countop(es, mvel, query):
    """
    RETURN SINGLE COUNT
    """
    select = listwrap(query.select)
    FromES = build_es_query(query)
    for s in select:

        if is_keyword(s.value):
            FromES.facets[s.name] = {
                "terms": {
                    "field": s.value,
                    "size": query.limit,
                },
                "facet_filter":{"exists":{"field":s.value}}
            }
        else:
            # COMPLICATED value IS PROBABLY A SCRIPT, USE IT
            FromES.facets[s.name] = {
                "terms": {
                    "script_field": es09.expressions.compile_expression(s.value, query),
                    "size": 200000
                }
            }

    data = es09.util.post(es, FromES, query.limit)

    matricies = {}
    for s in select:
        matricies[s.name] = Matrix(value=data.hits.facets[s.name].total)

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube
コード例 #4
0
ファイル: setop.py プロジェクト: klahnakoski/Activedata-ETL
def es_setop(es, query):
    es_query = es14.util.es_query_template()
    select = listwrap(query.select)

    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.fields = DictList()
    es_query.sort = qb_sort_to_es_sort(query.sort)
    source = "fields"
    for s in select:
        if s.value == "*":
            es_query.fields = None
            es_query.script_fields = None
            source = "_source"
        elif s.value == ".":
            es_query.fields = None
            es_query.script_fields = None
            source = "_source"
        elif isinstance(s.value, basestring) and is_keyword(s.value):
            es_query.fields.append(s.value)
        elif isinstance(s.value, list) and es_query.fields is not None:
            es_query.fields.extend(s.value)
        else:
            es_query.script_fields[literal_field(s.name)] = {"script": qb_expression_to_ruby(s.value)}

    return extract_rows(es, es_query, source, select, query)
コード例 #5
0
ファイル: rename.py プロジェクト: davehunt/ActiveData
 def convert(self, expr):
     """
     EXPAND INSTANCES OF name TO value
     """
     if expr is True or expr == None or expr is False:
         return expr
     elif Math.is_number(expr):
         return expr
     elif expr == ".":
         return "."
     elif is_keyword(expr):
         return coalesce(self.dimensions[expr], expr)
     elif isinstance(expr, basestring):
         Log.error("{{name|quote}} is not a valid variable name", name=expr)
     elif isinstance(expr, Date):
         return expr
     elif isinstance(expr, QueryOp):
         return self._convert_query(expr)
     elif isinstance(expr, Mapping):
         if expr["from"]:
             return self._convert_query(expr)
         elif len(expr) >= 2:
             #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
             return wrap({
                 name: self.convert(value)
                 for name, value in expr.leaves()
             })
         else:
             # ASSUME SINGLE-CLAUSE EXPRESSION
             k, v = expr.items()[0]
             return converter_map.get(k, self._convert_bop)(self, k, v)
     elif isinstance(expr, (list, set, tuple)):
         return wrap([self.convert(value) for value in expr])
     else:
         return expr
コード例 #6
0
def is_fieldop(es, query):
    if not (es.cluster.version.startswith("1.4.")
            or es.cluster.version.startswith("1.5.")):
        return False

    # THESE SMOOTH EDGES REQUIRE ALL DATA (SETOP)
    select = listwrap(query.select)
    if not query.edges:
        isDeep = len(split_field(
            query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
        isSimple = AND(
            s.value != None and (s.value in ["*", "."] or is_keyword(s.value))
            for s in select)
        noAgg = AND(s.aggregate == "none" for s in select)

        if not isDeep and isSimple and noAgg:
            return True
    else:
        isSmooth = AND((
            e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none")
                       for e in query.edges)
        if isSmooth:
            return True

    return False
コード例 #7
0
ファイル: setop.py プロジェクト: klahnakoski/Activedata-ETL
def es_fieldop(es, query):
    es_query = es14.util.es_query_template()
    select = listwrap(query.select)
    es_query.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter": simplify_esfilter(qb_expression_to_esfilter(query.where))
        }
    }
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = qb_sort_to_es_sort(query.sort)
    es_query.fields = DictList()
    source = "fields"
    for s in select.value:
        if s == "*":
            es_query.fields=None
            source = "_source"
        elif s == ".":
            es_query.fields=None
            source = "_source"
        elif isinstance(s, basestring) and is_keyword(s):
            es_query.fields.append(s)
        elif isinstance(s, list) and es_query.fields is not None:
            es_query.fields.extend(s)
        elif isinstance(s, Mapping) and es_query.fields is not None:
            es_query.fields.extend(s.values())
        elif es_query.fields is not None:
            es_query.fields.append(s)
    es_query.sort = [{s.field: "asc" if s.sort >= 0 else "desc"} for s in query.sort]

    return extract_rows(es, es_query, source, select, query)
コード例 #8
0
def es_countop(es, mvel, query):
    """
    RETURN SINGLE COUNT
    """
    select = listwrap(query.select)
    FromES = build_es_query(query)
    for s in select:

        if is_keyword(s.value):
            FromES.facets[s.name] = {
                "terms": {
                    "field": s.value,
                    "size": query.limit,
                },
                "facet_filter":{"exists":{"field":s.value}}
            }
        else:
            # COMPLICATED value IS PROBABLY A SCRIPT, USE IT
            FromES.facets[s.name] = {
                "terms": {
                    "script_field": es09.expressions.compile_expression(s.value, query),
                    "size": 200000
                }
            }

    data = es09.util.post(es, FromES, query.limit)

    matricies = {}
    for s in select:
        matricies[s.name] = Matrix(value=data.hits.facets[s.name].total)

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube
コード例 #9
0
ファイル: typed.py プロジェクト: klahnakoski/MoDevETL
 def convert(self, expr):
     """
     ADD THE ".$value" SUFFIX TO ALL VARIABLES
     """
     if expr is True or expr == None or expr is False:
         return expr
     elif Math.is_number(expr):
         return expr
     elif expr == ".":
         return "."
     elif is_keyword(expr):
         #TODO: LOOKUP SCHEMA AND ADD ALL COLUMNS WITH THIS PREFIX
         return expr + ".$value"
     elif isinstance(expr, basestring):
         Log.error("{{name|quote}} is not a valid variable name", name=expr)
     elif isinstance(expr, Date):
         return expr
     elif isinstance(expr, Query):
         return self._convert_query(expr)
     elif isinstance(expr, Mapping):
         if expr["from"]:
             return self._convert_query(expr)
         elif len(expr) >= 2:
             #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
             return wrap({name: self.convert(value) for name, value in expr.items()})
         else:
             # ASSUME SINGLE-CLAUSE EXPRESSION
             k, v = expr.items()[0]
             return self.converter_map.get(k, self._convert_bop)(k, v)
     elif isinstance(expr, (list, set, tuple)):
         return wrap([self.convert(value) for value in expr])
コード例 #10
0
ファイル: rename.py プロジェクト: klahnakoski/MoDevETL
 def convert(self, expr):
     """
     EXPAND INSTANCES OF name TO value
     """
     if expr is True or expr == None or expr is False:
         return expr
     elif Math.is_number(expr):
         return expr
     elif expr == ".":
         return "."
     elif is_keyword(expr):
         return coalesce(self.dimensions[expr], expr)
     elif isinstance(expr, basestring):
         Log.error("{{name|quote}} is not a valid variable name", name=expr)
     elif isinstance(expr, Date):
         return expr
     elif isinstance(expr, Query):
         return self._convert_query(expr)
     elif isinstance(expr, Mapping):
         if expr["from"]:
             return self._convert_query(expr)
         elif len(expr) >= 2:
             #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
             return wrap({name: self.convert(value) for name, value in expr.leaves()})
         else:
             # ASSUME SINGLE-CLAUSE EXPRESSION
             k, v = expr.items()[0]
             return converter_map.get(k, self._convert_bop)(self, k, v)
     elif isinstance(expr, (list, set, tuple)):
         return wrap([self.convert(value) for value in expr])
     else:
         return expr
コード例 #11
0
def es_fieldop(es, query):
    es_query = es14.util.es_query_template()
    select = listwrap(query.select)
    es_query.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter": simplify_esfilter(qb_expression_to_esfilter(query.where))
        }
    }
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = qb_sort_to_es_sort(query.sort)
    es_query.fields = DictList()
    source = "fields"
    for s in select.value:
        if s == "*":
            es_query.fields = None
            source = "_source"
        elif s == ".":
            es_query.fields = None
            source = "_source"
        elif isinstance(s, basestring) and is_keyword(s):
            es_query.fields.append(s)
        elif isinstance(s, list) and es_query.fields is not None:
            es_query.fields.extend(s)
        elif isinstance(s, Mapping) and es_query.fields is not None:
            es_query.fields.extend(s.values())
        elif es_query.fields is not None:
            es_query.fields.append(s)
    es_query.sort = [{
        s.field: "asc" if s.sort >= 0 else "desc"
    } for s in query.sort]

    return extract_rows(es, es_query, source, select, query)
コード例 #12
0
def es_setop(es, query):
    es_query = es14.util.es_query_template()
    select = listwrap(query.select)

    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.fields = DictList()
    es_query.sort = qb_sort_to_es_sort(query.sort)
    source = "fields"
    for s in select:
        if s.value == "*":
            es_query.fields = None
            es_query.script_fields = None
            source = "_source"
        elif s.value == ".":
            es_query.fields = None
            es_query.script_fields = None
            source = "_source"
        elif isinstance(s.value, basestring) and is_keyword(s.value):
            es_query.fields.append(s.value)
        elif isinstance(s.value, list) and es_query.fields is not None:
            es_query.fields.extend(s.value)
        else:
            es_query.script_fields[literal_field(s.name)] = {
                "script": qb_expression_to_ruby(s.value)
            }

    return extract_rows(es, es_query, source, select, query)
コード例 #13
0
def get_all_vars(expr):
    if expr == None:
        return set()
    elif isinstance(expr, unicode):
        if expr == "." or is_keyword(expr):
            return set([expr])
        else:
            Log.error("Expecting a json path")
    elif expr is True:
        return set()
    elif expr is False:
        return set()
    elif Math.is_number(expr):
        return set()

    op, term = expr.items()[0]

    mop = ruby_multi_operators.get(op)
    if mop:
        if isinstance(term, list):
            output = set()
            for t in term:
                output |= get_all_vars(t)
            return output
        elif isinstance(term, Mapping):
            a, b = term.items()[0]
            return get_all_vars(a) | get_all_vars(b)
        else:
            get_all_vars(term)

    bop = ruby_binary_operators.get(op)
    if bop:
        if isinstance(term, list):
            output = set()
            for t in term:
                output |= get_all_vars(t)
            return output
        elif isinstance(term, Mapping):
            if op == "eq":
                output = set()
                for a, b in term.items():
                    output |= get_all_vars(
                        a)  # {k:v} k IS VARIABLE, v IS A VALUE
                return output
            else:
                a, b = term.items()[0]
                return get_all_vars(a)
        else:
            Log.error("Expecting binary term")

    uop = ruby_unary_operators.get(op)
    if uop:
        return get_all_vars(term)

    cop = complex_operators.get(op)
    if cop:
        return cop(op, term).vars()

    Log.error("`{{op}}` is not a recognized operation", op=op)
コード例 #14
0
def get_all_vars(expr):
    if expr == None:
        return set()
    elif isinstance(expr, unicode):
        if expr == "." or is_keyword(expr):
            return set([expr])
        else:
            Log.error("Expecting a json path")
    elif expr is True:
        return set()
    elif expr is False:
        return set()
    elif Math.is_number(expr):
        return set()

    op, term = expr.items()[0]

    mop = ruby_multi_operators.get(op)
    if mop:
        if isinstance(term, list):
            output = set()
            for t in term:
                output |= get_all_vars(t)
            return output
        elif isinstance(term, Mapping):
            a, b = term.items()[0]
            return get_all_vars(a) | get_all_vars(b)
        else:
            get_all_vars(term)

    bop = ruby_binary_operators.get(op)
    if bop:
        if isinstance(term, list):
            output = set()
            for t in term:
                output |= get_all_vars(t)
            return output
        elif isinstance(term, Mapping):
            if op == "eq":
                output = set()
                for a, b in term.items():
                    output |= get_all_vars(a)  # {k:v} k IS VARIABLE, v IS A VALUE
                return output
            else:
                a, b = term.items()[0]
                return get_all_vars(a)
        else:
            Log.error("Expecting binary term")

    uop = ruby_unary_operators.get(op)
    if uop:
        return get_all_vars(term)

    cop = complex_operators.get(op)
    if cop:
        return cop(op, term).vars()

    Log.error("`{{op}}` is not a recognized operation",  op= op)
コード例 #15
0
    def update(self, command):
        """
        EXPECTING command == {"set":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS AN ES FILTER
        """
        command = wrap(command)
        schema = self._es.get_schema()

        # GET IDS OF DOCUMENTS
        results = self._es.search(
            {
                "fields": listwrap(schema._routing.path),
                "query": {
                    "filtered": {"query": {"match_all": {}}, "filter": jx_expression(command.where).to_esfilter()}
                },
                "size": 200000,
            }
        )

        # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
        scripts = FlatList()
        for k, v in command.set.items():
            if not is_keyword(k):
                Log.error("Only support simple paths for now")
            if isinstance(v, Mapping) and v.doc:
                scripts.append({"doc": v.doc})
            else:
                scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby()})

        if results.hits.hits:
            updates = []
            for h in results.hits.hits:
                for s in scripts:
                    updates.append(
                        {
                            "update": {
                                "_id": h._id,
                                "_routing": unwraplist(h.fields[literal_field(schema._routing.path)]),
                            }
                        }
                    )
                    updates.append(s)
            content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode("utf-8")
            response = self._es.cluster.post(
                self._es.path + "/_bulk",
                data=content,
                headers={"Content-Type": "application/json"},
                timeout=self.settings.timeout,
                params={"consistency": self.settings.consistency},
            )
            if response.errors:
                Log.error(
                    "could not update: {{error}}",
                    error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)],
                )
コード例 #16
0
ファイル: query.py プロジェクト: klahnakoski/intermittents
def _get_nested_path(field, schema):
    if not INDEX_CACHE:
        _late_import()

    if is_keyword(field):
        field = join_field([schema.es.alias] + split_field(field))
        for i, f in reverse(enumerate(split_field(field))):
            path = join_field(split_field(field)[0:i + 1:])
            if path in INDEX_CACHE:
                return join_field(split_field(path)[1::])
    return None
コード例 #17
0
ファイル: query.py プロジェクト: klahnakoski/MoTreeherder
def _where_terms(master, where, schema):
    """
    USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS
    master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS)
    """
    if isinstance(where, Mapping):
        if where.term:
            # MAP TERM
            try:
                output = _map_term_using_schema(master, [], where.term,
                                                schema.edges)
                return output
            except Exception, e:
                Log.error("programmer problem?", e)
        elif where.terms:
            # MAP TERM
            output = DictList()
            for k, v in where.terms.items():
                if not isinstance(v, (list, set)):
                    Log.error("terms filter expects list of values")
                edge = schema.edges[k]
                if not edge:
                    output.append({"terms": {k: v}})
                else:
                    if isinstance(edge, basestring):
                        # DIRECT FIELD REFERENCE
                        return {"terms": {edge: v}}
                    try:
                        domain = edge.getDomain()
                    except Exception, e:
                        Log.error("programmer error", e)
                    fields = domain.dimension.fields
                    if isinstance(fields, Mapping):
                        or_agg = []
                        for vv in v:
                            and_agg = []
                            for local_field, es_field in fields.items():
                                vvv = vv[local_field]
                                if vvv != None:
                                    and_agg.append({"term": {es_field: vvv}})
                            or_agg.append({"and": and_agg})
                        output.append({"or": or_agg})
                    elif isinstance(fields,
                                    list) and len(fields) == 1 and is_keyword(
                                        fields[0]):
                        output.append({"terms": {fields[0]: v}})
                    elif domain.partitions:
                        output.append({
                            "or":
                            [domain.getPartByKey(vv).esfilter for vv in v]
                        })
            return {"and": output}
コード例 #18
0
ファイル: expressions.py プロジェクト: mozilla/ChangeDetector
def qb_expression(expr):
    """
    WRAP A QB EXPRESSION WITH OBJECT REPRESENTATION
    """
    if expr in (True, False, None) or expr == None or isinstance(expr, (float, int, Decimal)) or isinstance(expr, Date):
        return Literal(None, expr)
    elif is_keyword(expr):
        return Variable(expr)

    try:
        items = expr.items()
    except Exception, e:
        Log.error("programmer error expr = {{value|quote}}", value=expr, cause=e)
コード例 #19
0
ファイル: lists.py プロジェクト: klahnakoski/MoDevETL
    def select(self, select):
        selects = listwrap(select)
        if selects[0].value == "." and selects[0].name == ".":
            return self

        for s in selects:
            if not isinstance(s.value, basestring) or not is_keyword(s.value):
                Log.error("selecting on structure, or expressions, not supported yet")

        #TODO: DO THIS WITH JUST A SCHEMA TRANSFORM, DO NOT TOUCH DATA
        #TODO: HANDLE STRUCTURE AND EXPRESSIONS
        new_schema = {s.name: self.schema[s.value] for s in selects}
        new_data = [{s.name: d[s.value] for s in selects} for d in self.data]
        return ListContainer("from "+self.name, data=new_data, schema=new_schema)
コード例 #20
0
ファイル: query.py プロジェクト: klahnakoski/MoDevETL
def _where_terms(master, where, schema):
    """
    USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS
    master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS)
    """
    if isinstance(where, Mapping):
        if where.term:
            # MAP TERM
            try:
                output = _map_term_using_schema(master, [], where.term, schema.edges)
                return output
            except Exception, e:
                Log.error("programmer problem?", e)
        elif where.terms:
            # MAP TERM
            output = DictList()
            for k, v in where.terms.items():
                if not isinstance(v, (list, set)):
                    Log.error("terms filter expects list of values")
                edge = schema.edges[k]
                if not edge:
                    output.append({"terms": {k: v}})
                else:
                    if isinstance(edge, basestring):
                        # DIRECT FIELD REFERENCE
                        return {"terms": {edge: v}}
                    try:
                        domain = edge.getDomain()
                    except Exception, e:
                        Log.error("programmer error", e)
                    fields = domain.dimension.fields
                    if isinstance(fields, Mapping):
                        or_agg = []
                        for vv in v:
                            and_agg = []
                            for local_field, es_field in fields.items():
                                vvv = vv[local_field]
                                if vvv != None:
                                    and_agg.append({"term": {es_field: vvv}})
                            or_agg.append({"and": and_agg})
                        output.append({"or": or_agg})
                    elif isinstance(fields, list) and len(fields) == 1 and is_keyword(fields[0]):
                        output.append({"terms": {fields[0]: v}})
                    elif domain.partitions:
                        output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]})
            return {"and": output}
コード例 #21
0
def is_fieldop(query):
    # THESE SMOOTH EDGES REQUIRE ALL DATA (SETOP)

    select = listwrap(query.select)
    if not query.edges:
        isDeep = len(split_field(query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
        isSimple = AND(s.value != None and (s.value == "*" or is_keyword(s.value)) for s in select)
        noAgg = AND(s.aggregate == "none" for s in select)

        if not isDeep and isSimple and noAgg:
            return True
    else:
        isSmooth = AND((e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
        if isSmooth:
            return True

    return False
コード例 #22
0
    def update(self, command):
        """
        EXPECTING command == {"set":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS AN ES FILTER
        """
        command = wrap(command)
        schema = self._es.get_schema()

        # GET IDS OF DOCUMENTS
        results = self._es.search({
            "fields": listwrap(schema._routing.path),
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": jx_expression(command.where).to_esfilter()
            }},
            "size": 200000
        })

        # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
        scripts = FlatList()
        for k, v in command.set.items():
            if not is_keyword(k):
                Log.error("Only support simple paths for now")
            if isinstance(v, Mapping) and v.doc:
                scripts.append({"doc": v.doc})
            else:
                scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby()})

        if results.hits.hits:
            updates = []
            for h in results.hits.hits:
                for s in scripts:
                    updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}})
                    updates.append(s)
            content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode('utf-8')
            response = self._es.cluster.post(
                self._es.path + "/_bulk",
                data=content,
                headers={"Content-Type": "application/json"},
                timeout=self.settings.timeout,
                params={"consistency": self.settings.consistency}
            )
            if response.errors:
                Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
コード例 #23
0
    def update(self, command):
        """
        EXPECTING command == {"set":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS AN ES FILTER
        """
        command = wrap(command)

        # GET IDS OF DOCUMENTS
        results = self._es.search({
            "fields": [],
            "query": {
                "filtered": {
                    "query": {
                        "match_all": {}
                    },
                    "filter": _normalize_where(command.where, self)
                }
            },
            "size": 200000
        })

        # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
        scripts = DictList()
        for k, v in command.set.items():
            if not is_keyword(k):
                Log.error("Only support simple paths for now")

            scripts.append("ctx._source." + k + " = " +
                           expressions.qb_expression_to_ruby(v) + ";\n")
        script = "".join(scripts)

        if results.hits.hits:
            command = []
            for id in results.hits.hits._id:
                command.append({"update": {"_id": id}})
                command.append({"script": script})
            content = ("\n".join(convert.value2json(c)
                                 for c in command) + "\n").encode('utf-8')
            self._es.cluster._post(
                self._es.path + "/_bulk",
                data=content,
                headers={"Content-Type": "application/json"})
コード例 #24
0
    def convert(self, expr):
        """
        ADD THE ".$value" SUFFIX TO ALL VARIABLES
        """
        if isinstance(expr, Expression):
            vars_ = expr.vars()
            rename = {
                v: join_field(split_field(v) + ["$value"])
                for v in vars_
            }
            return expr.map(rename)

        if expr is True or expr == None or expr is False:
            return expr
        elif Math.is_number(expr):
            return expr
        elif expr == ".":
            return "."
        elif is_keyword(expr):
            #TODO: LOOKUP SCHEMA AND ADD ALL COLUMNS WITH THIS PREFIX
            return expr + ".$value"
        elif isinstance(expr, basestring):
            Log.error("{{name|quote}} is not a valid variable name", name=expr)
        elif isinstance(expr, Date):
            return expr
        elif isinstance(expr, QueryOp):
            return self._convert_query(expr)
        elif isinstance(expr, Mapping):
            if expr["from"]:
                return self._convert_query(expr)
            elif len(expr) >= 2:
                #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
                return wrap({
                    name: self.convert(value)
                    for name, value in expr.items()
                })
            else:
                # ASSUME SINGLE-CLAUSE EXPRESSION
                k, v = expr.items()[0]
                return self.converter_map.get(k, self._convert_bop)(k, v)
        elif isinstance(expr, (list, set, tuple)):
            return wrap([self.convert(value) for value in expr])
コード例 #25
0
ファイル: aggop.py プロジェクト: mozilla/ActiveData-ETL
def es_aggop(es, mvel, query):
    select = listwrap(query.select)
    FromES = build_es_query(query)

    isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
    if isSimple:
        return es_countop(es, query)  # SIMPLE, USE TERMS FACET INSTEAD

    value2facet = dict()  # ONLY ONE FACET NEEDED PER
    name2facet = dict()  # MAP name TO FACET WITH STATS

    for s in select:
        if s.value not in value2facet:
            if is_keyword(s.value):
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "field": s.value
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            else:
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "script":
                        es09.expressions.compile_expression(s.value, query)
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            value2facet[s.value] = s.name
        name2facet[s.name] = value2facet[s.value]

    data = es09.util.post(es, FromES, query.limit)

    matricies = {
        s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[
            aggregates[s.aggregate]])
        for s in select
    }
    cube = Cube(query.select, [], matricies)
    cube.frum = query
    return cube
コード例 #26
0
ファイル: aggop.py プロジェクト: klahnakoski/MoDevETL
def es_aggop(es, mvel, query):
    select = listwrap(query.select)
    FromES = build_es_query(query)

    isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
    if isSimple:
        return es_countop(es, query)  # SIMPLE, USE TERMS FACET INSTEAD


    value2facet = dict()  # ONLY ONE FACET NEEDED PER
    name2facet = dict()   # MAP name TO FACET WITH STATS

    for s in select:
        if s.value not in value2facet:
            if is_keyword(s.value):
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "field": s.value
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            else:
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "script": es09.expressions.compile_expression(s.value, query)
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            value2facet[s.value] = s.name
        name2facet[s.name] = value2facet[s.value]

    data = es09.util.post(es, FromES, query.limit)

    matricies = {s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[aggregates[s.aggregate]]) for s in select}
    cube = Cube(query.select, [], matricies)
    cube.frum = query
    return cube
コード例 #27
0
def qb_expression_to_ruby(expr):
    if expr == None:
        return "nil"
    elif Math.is_number(expr):
        return unicode(expr)
    elif is_keyword(expr):
        return "doc[" + convert.string2quote(expr) + "].value"
    elif isinstance(expr, basestring):
        Log.error("{{name|quote}} is not a valid variable name", name=expr)
    elif isinstance(expr, CODE):
        return expr.code
    elif isinstance(expr, Date):
        return unicode(expr.unix)
    elif expr is True:
        return "true"
    elif expr is False:
        return "false"

    op, term = expr.items()[0]

    mop = ruby_multi_operators.get(op)
    if mop:
        if isinstance(term, list):
            if not term:
                return mop[1]  # RETURN DEFAULT
            else:
                output = mop[0].join(["(" + qb_expression_to_ruby(t) + ")" for t in term])
                return output
        elif isinstance(term, Mapping):
            a, b = term.items()[0]
            output = "(" + qb_expression_to_ruby(a) + ")" + mop[0] + "(" + qb_expression_to_ruby(b) + ")"
            return output
        else:
            qb_expression_to_ruby(term)


    bop = ruby_binary_operators.get(op)
    if bop:
        if isinstance(term, list):
            output = bop.join(["(" + qb_expression_to_ruby(t) + ")" for t in term])
            return output
        elif isinstance(term, Mapping):
            if op == "eq":
                # eq CAN ACCEPT A WHOLE OBJECT OF key:value PAIRS TO COMPARE
                output = " and ".join("(" + qb_expression_to_ruby(a) + ")" + bop + "(" + qb_expression_to_ruby(b) + ")" for a, b in term.items())
                return output
            else:
                a, b = term.items()[0]
                output = "(" + qb_expression_to_ruby(a) + ")" + bop + "(" + qb_expression_to_ruby(b) + ")"
                return output
        else:
            Log.error("Expecting binary term")

    uop = ruby_unary_operators.get(op)
    if uop:
        output = expand_template(uop, {"term": qb_expression_to_ruby(term)})
        return output

    cop = complex_operators.get(op)
    if cop:
        output = cop(term).to_ruby()
        return output

    Log.error("`{{op}}` is not a recognized operation",  op= op)
コード例 #28
0
def qb_expression_to_python(expr):
    if expr == None:
        return "None"
    elif Math.is_number(expr):
        return unicode(expr)
    elif isinstance(expr, Date):
        return unicode(expr.unix)
    elif isinstance(expr, unicode):
        if expr == ".":
            return "row"
        elif is_keyword(expr):
            return "row[" + convert.value2quote(expr) + "]"
        else:
            Log.error("Expecting a json path")
    elif isinstance(expr, CODE):
        return expr.code
    elif expr is True:
        return "True"
    elif expr is False:
        return "False"

    op, term = expr.items()[0]

    mop = python_multi_operators.get(op)
    if mop:
        if isinstance(term, list):
            if not term:
                return mop[1]  # RETURN DEFAULT
            else:
                output = mop[0].join(["(" + qb_expression_to_python(t) + ")" for t in term])
                return output
        elif isinstance(term, Mapping):
            a, b = term.items()[0]
            output = "(" + qb_expression_to_python(a) + ")" + mop[0] + "(" + qb_expression_to_python(b) + ")"
            return output
        else:
            qb_expression_to_python(term)

    bop = python_binary_operators.get(op)
    if bop:
        if isinstance(term, list):
            output = bop.join(["(" + qb_expression_to_python(t) + ")" for t in term])
            return output
        elif isinstance(term, Mapping):
            if op == "eq":
                # eq CAN ACCEPT A WHOLE OBJECT OF key:value PAIRS TO COMPARE
                output = " and ".join("(" + qb_expression_to_python(a) + ")" + bop + "(" + qb_expression_to_python(b) + ")" for a, b in term.items())
                return output
            else:
                a, b = term.items()[0]
                output = "(" + qb_expression_to_python(a) + ")" + bop + "(" + qb_expression_to_python(b) + ")"
                return output
        else:
            Log.error("Expecting binary term")

    uop = python_unary_operators.get(op)
    if uop:
        output = uop + "(" + qb_expression_to_python(term) + ")"
        return output

    Log.error("`{{op}}` is not a recognized operation",  op= op)
コード例 #29
0
def qb_expression_to_ruby(expr):
    if expr == None:
        return "nil"
    elif Math.is_number(expr):
        return unicode(expr)
    elif is_keyword(expr):
        return "doc[" + convert.string2quote(expr) + "].value"
    elif isinstance(expr, basestring):
        Log.error("{{name|quote}} is not a valid variable name", name=expr)
    elif isinstance(expr, CODE):
        return expr.code
    elif isinstance(expr, Date):
        return unicode(expr.unix)
    elif expr is True:
        return "true"
    elif expr is False:
        return "false"

    op, term = expr.items()[0]

    mop = ruby_multi_operators.get(op)
    if mop:
        if isinstance(term, list):
            if not term:
                return mop[1]  # RETURN DEFAULT
            else:
                output = mop[0].join(
                    ["(" + qb_expression_to_ruby(t) + ")" for t in term])
                return output
        elif isinstance(term, Mapping):
            a, b = term.items()[0]
            output = "(" + qb_expression_to_ruby(
                a) + ")" + mop[0] + "(" + qb_expression_to_ruby(b) + ")"
            return output
        else:
            qb_expression_to_ruby(term)

    bop = ruby_binary_operators.get(op)
    if bop:
        if isinstance(term, list):
            output = bop.join(
                ["(" + qb_expression_to_ruby(t) + ")" for t in term])
            return output
        elif isinstance(term, Mapping):
            if op == "eq":
                # eq CAN ACCEPT A WHOLE OBJECT OF key:value PAIRS TO COMPARE
                output = " and ".join("(" + qb_expression_to_ruby(a) + ")" +
                                      bop + "(" + qb_expression_to_ruby(b) +
                                      ")" for a, b in term.items())
                return output
            else:
                a, b = term.items()[0]
                output = "(" + qb_expression_to_ruby(
                    a) + ")" + bop + "(" + qb_expression_to_ruby(b) + ")"
                return output
        else:
            Log.error("Expecting binary term")

    uop = ruby_unary_operators.get(op)
    if uop:
        output = expand_template(uop, {"term": qb_expression_to_ruby(term)})
        return output

    cop = complex_operators.get(op)
    if cop:
        output = cop(term).to_ruby()
        return output

    Log.error("`{{op}}` is not a recognized operation", op=op)
コード例 #30
0
def _range_composer(edge, domain, es_query, to_float):
    # USE RANGES
    _min = coalesce(domain.min, MAX(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if is_keyword(edge.value):
        calc = {"field": edge.value}
    else:
        calc = {"script": qb_expression_to_ruby(edge.value)}

    if edge.allowNulls:
        if is_keyword(edge.value):
            missing_range = {
                "or": [{
                    "range": {
                        edge.value: {
                            "lt": to_float(_min)
                        }
                    }
                }, {
                    "range": {
                        edge.value: {
                            "gte": to_float(_max)
                        }
                    }
                }]
            }
        else:
            missing_range = {
                "script": {
                    "script":
                    qb_expression_to_ruby({
                        "or": [
                            {
                                "lt": [edge.value, to_float(_min)]
                            },
                            {
                                "gt": [edge.value, to_float(_max)]
                            },
                        ]
                    })
                }
            }
        missing_filter = set_default(
            {
                "filter": {
                    "or": [
                        missing_range, {
                            "missing": {
                                "field": get_all_vars(edge.value)
                            }
                        }
                    ]
                }
            }, es_query)
    else:
        missing_filter = None

    return wrap({
        "aggs": {
            "_match":
            set_default({"range": calc}, {
                "range": {
                    "ranges": [{
                        "from": to_float(p.min),
                        "to": to_float(p.max)
                    } for p in domain.partitions]
                }
            }, es_query),
            "_missing":
            missing_filter
        }
    })
コード例 #31
0
ファイル: deep.py プロジェクト: klahnakoski/MoDevETL
def es_deepop(es, query):
    columns = query.frum.get_columns()
    query_path = query.frum.query_path
    columns = UniqueIndex(keys=["name"], data=sorted(columns, lambda a, b: cmp(len(listwrap(b.nested_path)), len(listwrap(a.nested_path)))), fail_on_dup=False)
    map_ = {c.name: c.abs_name for c in columns}
    map_to_local = {
        c.name: "_inner" + c.abs_name[len(listwrap(c.nested_path)[0]):] if c.nested_path else "fields." + literal_field(c.abs_name)
        for c in columns
    }
    # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
    # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER:  ES ALLOWS
    # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
    # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
    post_expressions = {}
    es_query, es_filters = es14.util.es_query_template(query.frum.name)

    # SPLIT WHERE CLAUSE BY DEPTH
    wheres = split_expression_by_depth(qb_expression(query.where), query.frum, map_)
    for i, f in enumerate(es_filters):
        # PROBLEM IS {"match_all": {}} DOES NOT SURVIVE set_default()
        for k, v in unwrap(simplify_esfilter(AndOp("and", wheres[i]).to_esfilter())).items():
            f[k] = v


    if not wheres[1]:
        more_filter = {
            "and": [
                simplify_esfilter(AndOp("and", wheres[0]).to_esfilter()),
                {"not": {
                    "nested": {
                        "path": query_path,
                        "filter": {
                            "match_all": {}
                        }
                    }
                }}
            ]
        }
    else:
        more_filter = None

    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = qb_sort_to_es_sort(query.sort)
    es_query.fields = []

    is_list = isinstance(query.select, list)
    new_select = DictList()

    def get_pull(column):
        if column.nested_path:
            return "_inner" + column.abs_name[len(listwrap(column.nested_path)[0]):]
        else:
            return "fields." + literal_field(column.abs_name)

    i = 0
    for s in listwrap(query.select):
        if s.value == "*":
            # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
            for c in columns:
                if c.relative and c.type not in ["nested", "object"]:
                    if not c.nested_path:
                        es_query.fields += [c.abs_name]
                    new_select.append({
                        "name": c.name,
                        "pull": get_pull(c),
                        "nested_path": listwrap(c.nested_path)[0],
                        "put": {"name": c.name, "index": i, "child": "."}
                    })
                    i += 1

            # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
            col_names = [c.name for c in columns if c.relative]
            for n in new_select:
                if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
                    n.name = n.put.name = n.name.lstrip(".")
        elif s.value == ".":
            for c in columns:
                if c.relative and c.type not in ["nested", "object"]:
                    if not c.nested_path:
                        es_query.fields += [c.abs_name]
                    new_select.append({
                        "name": c.name,
                        "pull": get_pull(c),
                        "nested_path": listwrap(c.nested_path)[0],
                        "put": {"name": ".", "index": i, "child": c.abs_name}
                    })
            i += 1
        elif s.value == "_id":
            new_select.append({
                "name": s.name,
                "value": s.value,
                "pull": "_id",
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1
        elif isinstance(s.value, basestring) and s.value.endswith(".*") and is_keyword(s.value[:-2]):
            parent = s.value[:-1]
            prefix = len(parent)
            for c in columns:
                if c.name.startswith(parent) and c.type not in ["object", "nested"]:
                    pull = get_pull(c)
                    if len(listwrap(c.nested_path)) == 0:
                        es_query.fields += [c.abs_name]

                    new_select.append({
                        "name": s.name + "." + c.name[prefix:],
                        "pull": pull,
                        "nested_path": listwrap(c.nested_path)[0],
                        "put": {"name": s.name + "." + literal_field(c.name[prefix:]), "index": i, "child": "."}
                    })
                    i += 1
        elif isinstance(s.value, basestring) and is_keyword(s.value):
            parent = s.value + "."
            prefix = len(parent)
            net_columns = [c for c in columns if c.name.startswith(parent) and c.type not in ["object", "nested"]]
            if not net_columns:
                c = columns[(s.value,)]
                pull = get_pull(c)
                if not c.nested_path:
                    es_query.fields += [s.value]
                new_select.append({
                    "name": s.name,
                    "pull": pull,
                    "nested_path": listwrap(c.nested_path)[0],
                    "put": {"name": s.name, "index": i, "child": "."}
                })
            else:
                for n in net_columns:
                    pull = get_pull(n)
                    if not n.nested_path:
                        es_query.fields += [n.abs_name]
                    new_select.append({
                        "name": s.name,
                        "pull": pull,
                        "nested_path": listwrap(n.nested_path)[0],
                        "put": {"name": s.name, "index": i, "child": n.name[prefix:]}
                    })
            i += 1
        else:
            expr = qb_expression(s.value)
            for v in expr.vars():
                for n in columns:
                    if n.name==v:
                        if not n.nested_path:
                            es_query.fields += [n.abs_name]

            pull = EXPRESSION_PREFIX + s.name
            post_expressions[pull] = compile_expression(expr.map(map_to_local).to_python())


            new_select.append({
                "name": s.name if is_list else ".",
                "pull": pull,
                "value": expr.to_dict(),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    # <COMPLICATED> ES needs two calls to get all documents
    more = []
    def get_more(please_stop):
        more.append(es09.util.post(
            es,
            Dict(
                filter=more_filter,
                fields=es_query.fields
            ),
            query.limit
        ))
    if more_filter:
        need_more = Thread.run("get more", target=get_more)

    with Timer("call to ES") as call_timer:
        data = es09.util.post(es, es_query, query.limit)

    # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
    def inners():
        for t in data.hits.hits:
            for i in t.inner_hits[literal_field(query_path)].hits.hits:
                t._inner = i._source
                for k, e in post_expressions.items():
                    t[k] = e(t)
                yield t
        if more_filter:
            Thread.join(need_more)
            for t in more[0].hits.hits:
                yield t
    #</COMPLICATED>

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(inners(), new_select, query)
        output.meta.es_response_time = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        Log.error("problem formatting", e)
コード例 #32
0
def es_terms_stats(esq, mvel, query):
    select = listwrap(query.select)
    facetEdges = []    # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
    termsEdges = FlatList()
    specialEdge = None
    special_index = -1

    # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
    # FIND THE specialEdge, IF ONE
    for f, tedge in enumerate(query.edges):
        if tedge.domain.type in domains.KNOWN:
            for p, part in enumerate(tedge.domain.partitions):
                part.dataIndex = p

            # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
            # OR IF WE ARE NOT SIMPLY COUNTING
            # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
            # OR IF WE JUST WANT TO FORCE IT :)
            # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM

            facetEdges.append(tedge)
        else:
            if specialEdge:
                Log.error("There is more than one open-ended edge: self can not be handled")
            specialEdge = tedge
            special_index = f
            termsEdges.append(tedge)

    if not specialEdge:
        # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
        # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
        num_parts = 0
        special_index = -1
        for i, e in enumerate(facetEdges):
            l = len(e.domain.partitions)
            if ((e.value and is_keyword(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts:
                num_parts = l
                specialEdge = e
                special_index = i

        facetEdges.pop(special_index)
        termsEdges.append(specialEdge)

    total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select)
    if total_facets > 100:
        # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
        counts = esq.query({
            "from": query.frum,
            "select": {"aggregate": "count"},
            "edges": facetEdges,
            "where": query.where,
            "limit": query.limit
        })

        esFacets = []

        def add_facet(value, parts, cube):
            if value:
                esFacets.append(parts)

        counts["count"].forall(add_facet)

        Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found",  real_count= len(esFacets),  theory_count=total_facets)

        if not esFacets:
            # MAKE EMPTY CUBE
            matricies = {}
            dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
            for s in select:
                matricies[s.name] = Matrix(*dims)
            cube = Cube(query.select, query.edges, matricies)
            cube.frum = query
            return cube

    else:
        # GENERATE ALL COMBOS
        esFacets = getAllEdges(facetEdges)

    calcTerm = compileEdges2Term(mvel, termsEdges, FlatList())
    term2parts = calcTerm.term2parts

    if len(esFacets) * len(select) > 1000:
        Log.error("not implemented yet")  # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
        pass

    FromES = build_es_query(query)

    for s in select:
        for parts in esFacets:
            condition = FlatList()
            constants = FlatList()
            name = [literal_field(s.name)]
            for f, fedge in enumerate(facetEdges):
                name.append(str(parts[f].dataIndex))
                condition.append(buildCondition(mvel, fedge, parts[f]))
                constants.append({"name": fedge.domain.name, "value": parts[f]})
            condition.append(query.where)
            name = ",".join(name)

            FromES.facets[name] = {
                "terms_stats": {
                    "key_field": calcTerm.field,
                    "value_field": s.value if is_keyword(s.value) else None,
                    "value_script": mvel.compile_expression(s.value) if not is_keyword(s.value) else None,
                    "size": coalesce(query.limit, 200000)
                }
            }
            if condition:
                FromES.facets[name].facet_filter = simplify_esfilter({"and": condition})

    data = es09.util.post(esq.es, FromES, query.limit)

    if specialEdge.domain.type not in domains.KNOWN:
        # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
        partitions = FlatList()
        map = {}
        for facetName, parts in data.facets.items():
            for stats in parts.terms:
                if not map[stats]:
                    part = {"value": stats, "name": stats}
                    partitions.append(part)
                    map[stats] = part

        partitions.sort(specialEdge.domain.compare)
        for p, part in enumerate(partitions):
            part.dataIndex = p

        specialEdge.domain.map = map
        specialEdge.domain.partitions = partitions

    # MAKE CUBE
    matricies = {}
    dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
    for s in select:
        matricies[s.name] = Matrix(*dims)

    name2agg = {s.name: aggregates[s.aggregate] for s in select}

    # FILL CUBE
    for edgeName, parts in data.facets.items():
        temp = edgeName.split(",")
        pre_coord = tuple(int(c) for c in temp[1:])
        sname = temp[0]

        for stats in parts.terms:
            if specialEdge:
                special = term2parts(stats.term)[0]
                coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:]
            else:
                coord = pre_coord
            matricies[sname][coord] = stats[name2agg[sname]]

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube
コード例 #33
0
ファイル: aggs.py プロジェクト: mozilla/ChangeDetector
def es_aggsop(es, frum, query):
    select = wrap([s.copy() for s in listwrap(query.select)])

    es_query = Dict()
    new_select = Dict()  # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
    formula = []
    for s in select:
        if s.aggregate == "count" and (s.value == None or s.value == "."):
            s.pull = "doc_count"
        elif s.value == ".":
            if frum.typed:
                # STATISITCAL AGGS IMPLY $value, WHILE OTHERS CAN BE ANYTHING
                if s.aggregate in NON_STATISTICAL_AGGS:
                    # TODO: HANDLE BOTH $value AND $objects TO COUNT
                    Log.error("do not know how to handle")
                else:
                    s.value = "$value"
                    new_select["$value"] += [s]
            else:
                if s.aggregate in NON_STATISTICAL_AGGS:
                    # TODO:  WE SHOULD BE ABLE TO COUNT, BUT WE MUST *OR* ALL LEAF VALUES TO DO IT
                    Log.error("do not know how to handle")
                else:
                    Log.error('Not expecting ES to have a value at "." which {{agg}} can be applied', agg=s.aggregate)
        elif is_keyword(s.value) and s.aggregate == "count":
            s.value = coalesce(frum[s.value].abs_name, s.value)
            new_select["count_" + literal_field(s.value)] += [s]
        elif is_keyword(s.value):
            s.value = coalesce(frum[s.value].abs_name, s.value)
            new_select[literal_field(s.value)] += [s]
        else:
            formula.append(s)

    for canonical_name, many in new_select.items():
        representative = many[0]
        if representative.value == ".":
            Log.error("do not know how to handle")
        else:
            field_name = representative.value

        # canonical_name=literal_field(many[0].name)
        for s in many:
            if s.aggregate == "count":
                es_query.aggs[literal_field(canonical_name)].value_count.field = field_name
                s.pull = literal_field(canonical_name) + ".value"
            elif s.aggregate == "median":
                # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
                key = literal_field(canonical_name + " percentile")

                es_query.aggs[key].percentiles.field = field_name
                es_query.aggs[key].percentiles.percents += [50]
                s.pull = key + ".values.50\.0"
            elif s.aggregate == "percentile":
                # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
                key = literal_field(canonical_name + " percentile")
                percent = Math.round(s.percentile * 100, decimal=6)

                es_query.aggs[key].percentiles.field = field_name
                es_query.aggs[key].percentiles.percents += [percent]
                s.pull = key + ".values." + literal_field(unicode(percent))
            elif s.aggregate == "cardinality":
                # ES USES DIFFERENT METHOD FOR CARDINALITY
                key = literal_field(canonical_name + " cardinality")

                es_query.aggs[key].cardinality.field = field_name
                s.pull = key + ".value"
            else:
                # PULL VALUE OUT OF THE stats AGGREGATE
                es_query.aggs[literal_field(canonical_name)].stats.field = field_name
                s.pull = literal_field(canonical_name) + "." + aggregates1_4[s.aggregate]

    for i, s in enumerate(formula):
        canonical_name = literal_field(s.name)
        abs_value = qb_expression(s.value).map({c.name: c.abs_name for c in frum._columns})

        if s.aggregate == "count":
            es_query.aggs[literal_field(canonical_name)].value_count.script = abs_value.to_ruby()
            s.pull = literal_field(canonical_name) + ".value"
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")

            es_query.aggs[key].percentiles.script = abs_value.to_ruby()
            es_query.aggs[key].percentiles.percents += [50]
            s.pull = key + ".values.50\.0"
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = Math.round(s.percentile * 100, decimal=6)

            es_query.aggs[key].percentiles.script = abs_value.to_ruby()
            es_query.aggs[key].percentiles.percents += [percent]
            s.pull = key + ".values." + literal_field(unicode(percent))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"

            es_query.aggs[key].cardinality.script = abs_value.to_ruby()
            s.pull = key + ".value"
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
            es_query.aggs[canonical_name].stats.script = abs_value.to_ruby()

    decoders = get_decoders_by_depth(query)
    start = 0

    vars_ = qb_expression(query.where).vars()
    map_ = {v: frum[v].abs_name for v in vars_}

    # <TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
    split_where = split_expression_by_depth(qb_expression(query.where), schema=frum, map_=map_)

    if len(split_field(frum.name)) > 1:
        if any(split_where[2:]):
            Log.error("Where clause is too deep")

        for d in decoders[1]:
            es_query = d.append_query(es_query, start)
            start += d.num_columns

        if split_where[1]:
            # TODO: INCLUDE FILTERS ON EDGES
            filter_ = simplify_esfilter(AndOp("and", split_where[1]).to_esfilter())
            es_query = Dict(aggs={"_filter": set_default({"filter": filter_}, es_query)})

        es_query = wrap({"aggs": {"_nested": set_default({"nested": {"path": frum.query_path}}, es_query)}})
    else:
        if any(split_where[1:]):
            Log.error("Where clause is too deep")

    for d in decoders[0]:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if split_where[0]:
        # TODO: INCLUDE FILTERS ON EDGES
        filter = simplify_esfilter(AndOp("and", split_where[0]).to_esfilter())
        es_query = Dict(aggs={"_filter": set_default({"filter": filter}, es_query)})
    # </TERRIBLE SECTION>

    if not es_query:
        es_query = wrap({"query": {"match_all": {}}})

    es_query.size = 0

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        decoders = [d for ds in decoders for d in ds]
        result.aggregations.doc_count = coalesce(
            result.aggregations.doc_count, result.hits.total
        )  # IT APPEARS THE OLD doc_count IS GONE

        formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
        if query.edges:
            output = formatter(decoders, result.aggregations, start, query, select)
        elif query.groupby:
            output = groupby_formatter(decoders, result.aggregations, start, query, select)
        else:
            output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.es_response_time = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e)
        Log.error("Some problem", e)
コード例 #34
0
def buildCondition(mvel, edge, partition):
    """
    RETURN AN ES FILTER OBJECT
    """
    output = {}

    if edge.domain.isFacet:
        # MUST USE THIS' esFacet
        condition = wrap(coalesce(partition.where, {"and": []}))

        if partition.min and partition.max and is_keyword(edge.value):
            condition["and"].append({
                "range": {edge.value: {"gte": partition.min, "lt": partition.max}}
            })

        # ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT)
        return simplify_esfilter(condition)
    elif edge.range:
        # THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN
        # USE MVEL CODE
        if edge.domain.type in domains.ALGEBRAIC:
            output = {"and": []}

            if edge.range.mode and edge.range.mode == "inclusive":
                # IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE
                if is_keyword(edge.range.min):
                    output["and"].append({"range": {edge.range.min: {"lt": es09.expressions.value2value(partition.max)}}})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        edge.range.min + " < " + es09.expressions.value2MVEL(partition.max)
                    )}})

                if is_keyword(edge.range.max):
                    output["and"].append({"or": [
                        {"missing": {"field": edge.range.max}},
                        {"range": {edge.range.max, {"gt": es09.expressions.value2value(partition.min)}}}
                    ]})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        edge.range.max + " > " + es09.expressions.value2MVEL(partition.min))}})

            else:
                # SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE
                if is_keyword(edge.range.min):
                    output["and"].append({"range": {edge.range.min: {"lte": es09.expressions.value2value(partition.min)}}})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        edge.range.min + "<=" + es09.expressions.value2MVEL(partition.min)
                    )}})

                if is_keyword(edge.range.max):
                    output["and"].append({"or": [
                        {"missing": {"field": edge.range.max}},
                        {"range": {edge.range.max, {"gte": es09.expressions.value2value(partition.min)}}}
                    ]})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        es09.expressions.value2MVEL(partition.min) + " <= " + edge.range.max
                    )}})
            return output
        else:
            Log.error("Do not know how to handle range query on non-continuous domain")

    elif not edge.value:
        # MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE)
        return partition.esfilter
    elif is_keyword(edge.value):
        # USE FAST ES SYNTAX
        if edge.domain.type in domains.ALGEBRAIC:
            output.range = {}
            output.range[edge.value] = {"gte": es09.expressions.value2query(partition.min), "lt": es09.expressions.value2query(partition.max)}
        elif edge.domain.type == "set":
            if partition.value:
                if partition.value != edge.domain.getKey(partition):
                    Log.error("please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former")
                    # DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS
                output.term = {edge.value: partition.value}
            else:
                output.term = {edge.value: edge.domain.getKey(partition)}

        elif edge.domain.type == "default":
            output.term = dict()
            output.term[edge.value] = partition.value
        else:
            Log.error("Edge \"" + edge.name + "\" is not supported")

        return output
    else:
        # USE MVEL CODE
        if edge.domain.type in domains.ALGEBRAIC:
            output.script = {"script": edge.value + ">=" + es09.expressions.value2MVEL(partition.min) + " and " + edge.value + "<" + es09.expressions.value2MVEL(partition.max)}
        else:
            output.script = {"script": "( " + edge.value + " ) ==" + es09.expressions.value2MVEL(partition.value)}

        code = es09.expressions.addFunctions(output.script.script)
        output.script.script = code.head + code.body
        return output
コード例 #35
0
def es_terms_stats(esq, mvel, query):
    select = listwrap(query.select)
    facetEdges = []    # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
    termsEdges = DictList()
    specialEdge = None
    special_index = -1

    # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
    # FIND THE specialEdge, IF ONE
    for f, tedge in enumerate(query.edges):
        if tedge.domain.type in domains.KNOWN:
            for p, part in enumerate(tedge.domain.partitions):
                part.dataIndex = p

            # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
            # OR IF WE ARE NOT SIMPLY COUNTING
            # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
            # OR IF WE JUST WANT TO FORCE IT :)
            # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM

            facetEdges.append(tedge)
        else:
            if specialEdge:
                Log.error("There is more than one open-ended edge: self can not be handled")
            specialEdge = tedge
            special_index = f
            termsEdges.append(tedge)

    if not specialEdge:
        # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
        # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
        num_parts = 0
        special_index = -1
        for i, e in enumerate(facetEdges):
            l = len(e.domain.partitions)
            if ((e.value and is_keyword(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts:
                num_parts = l
                specialEdge = e
                special_index = i

        facetEdges.pop(special_index)
        termsEdges.append(specialEdge)

    total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select)
    if total_facets > 100:
        # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
        counts = esq.query({
            "from": query.frum,
            "select": {"aggregate": "count"},
            "edges": facetEdges,
            "where": query.where,
            "limit": query.limit
        })

        esFacets = []

        def add_facet(value, parts, cube):
            if value:
                esFacets.append(parts)

        counts["count"].forall(add_facet)

        Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found",  real_count= len(esFacets),  theory_count=total_facets)

        if not esFacets:
            # MAKE EMPTY CUBE
            matricies = {}
            dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
            for s in select:
                matricies[s.name] = Matrix(*dims)
            cube = Cube(query.select, query.edges, matricies)
            cube.frum = query
            return cube

    else:
        # GENERATE ALL COMBOS
        esFacets = getAllEdges(facetEdges)

    calcTerm = compileEdges2Term(mvel, termsEdges, DictList())
    term2parts = calcTerm.term2parts

    if len(esFacets) * len(select) > 1000:
        Log.error("not implemented yet")  # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
        pass

    FromES = build_es_query(query)

    for s in select:
        for parts in esFacets:
            condition = DictList()
            constants = DictList()
            name = [literal_field(s.name)]
            for f, fedge in enumerate(facetEdges):
                name.append(str(parts[f].dataIndex))
                condition.append(buildCondition(mvel, fedge, parts[f]))
                constants.append({"name": fedge.domain.name, "value": parts[f]})
            condition.append(query.where)
            name = ",".join(name)

            FromES.facets[name] = {
                "terms_stats": {
                    "key_field": calcTerm.field,
                    "value_field": s.value if is_keyword(s.value) else None,
                    "value_script": mvel.compile_expression(s.value) if not is_keyword(s.value) else None,
                    "size": coalesce(query.limit, 200000)
                }
            }
            if condition:
                FromES.facets[name].facet_filter = simplify_esfilter({"and": condition})

    data = es09.util.post(esq.es, FromES, query.limit)

    if specialEdge.domain.type not in domains.KNOWN:
        # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
        partitions = DictList()
        map = {}
        for facetName, parts in data.facets.items():
            for stats in parts.terms:
                if not map[stats]:
                    part = {"value": stats, "name": stats}
                    partitions.append(part)
                    map[stats] = part

        partitions.sort(specialEdge.domain.compare)
        for p, part in enumerate(partitions):
            part.dataIndex = p

        specialEdge.domain.map = map
        specialEdge.domain.partitions = partitions

    # MAKE CUBE
    matricies = {}
    dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
    for s in select:
        matricies[s.name] = Matrix(*dims)

    name2agg = {s.name: aggregates[s.aggregate] for s in select}

    # FILL CUBE
    for edgeName, parts in data.facets.items():
        temp = edgeName.split(",")
        pre_coord = tuple(int(c) for c in temp[1:])
        sname = temp[0]

        for stats in parts.terms:
            if specialEdge:
                special = term2parts(stats.term)[0]
                coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:]
            else:
                coord = pre_coord
            matricies[sname][coord] = stats[name2agg[sname]]

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube
コード例 #36
0
 def test_good_var(self):
     self.assertTrue(
         is_keyword(u'_a._b'),
         "That's a good variable name!"
     )
コード例 #37
0
 def test_error_on_bad_var(self):
     self.assertFalse(
         is_keyword(u'coalesce(rows[rownum+1].timestamp, Date.eod())'),
         "That's not a valid variable name!!"
     )
コード例 #38
0
ファイル: expressions.py プロジェクト: mozilla/ChangeDetector
 def __init__(self, var):
     Expression.__init__(self, "", None)
     if not is_keyword(var):
         Log.error("Expecting a variable")
     self.var = var
コード例 #39
0
ファイル: setop.py プロジェクト: klahnakoski/MoDevETL
def extract_rows(es, es_query, query):
    is_list = isinstance(query.select, list)
    select = wrap([s.copy() for s in listwrap(query.select)])
    new_select = DictList()
    column_names = set(c.name for c in query.frum.get_columns() if c.type not in ["object"] and (not c.nested_path or c.abs_name == c.nested_path or not c.nested_path))
    source = "fields"

    i = 0
    for s in select:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if s.value == "*":
            es_query.fields = None
            source = "_source"

            net_columns = column_names - set(select.name)
            for n in net_columns:
                new_select.append({
                    "name": n,
                    "value": n,
                    "put": {"name": n, "index": i, "child": "."}
                })
                i += 1
        elif s.value == ".":
            es_query.fields = None
            source = "_source"

            new_select.append({
                "name": s.name,
                "value": s.value,
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1
        elif s.value == "_id":
            new_select.append({
                "name": s.name,
                "value": s.value,
                "pull": "_id",
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1
        elif isinstance(s.value, basestring) and s.value.endswith(".*") and is_keyword(s.value[:-2]):
            parent = s.value[:-1]
            prefix = len(parent)
            for c in column_names:
                if c.startswith(parent):
                    if es_query.fields is not None:
                        es_query.fields.append(c)

                    new_select.append({
                        "name": s.name + "." + c[prefix:],
                        "value": c,
                        "put": {"name": s.name + "." + c[prefix:], "index": i, "child": "."}
                    })
                    i += 1
        elif isinstance(s.value, basestring) and is_keyword(s.value):
            parent = s.value + "."
            prefix = len(parent)
            net_columns = [c for c in column_names if c.startswith(parent)]
            if not net_columns:
                if es_query.fields is not None:
                    es_query.fields.append(s.value)
                new_select.append({
                    "name": s.name,
                    "value": s.value,
                    "put": {"name": s.name, "index": i, "child": "."}
                })
            else:
                for n in net_columns:
                    if es_query.fields is not None:
                        es_query.fields.append(n)
                    new_select.append({
                        "name": s.name,
                        "value": n,
                        "put": {"name": s.name, "index": i, "child": n[prefix:]}
                    })
            i += 1
        elif isinstance(s.value, list):
            Log.error("need an example")
            if es_query.fields is not None:
                es_query.fields.extend([v for v in s.value])
        else:
            es_query.script_fields[literal_field(s.name)] = {"script": qb_expression(s.value).to_ruby()}
            new_select.append({
                "name": s.name,
                "pull": "fields." + literal_field(s.name),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    for n in new_select:
        if n.pull:
            continue
        if source == "_source":
            n.pull = join_field(["_source"] + split_field(n.value))
        else:
            n.pull = "fields." + literal_field(n.value)

    with Timer("call to ES") as call_timer:
        data = es09.util.post(es, es_query, query.limit)

    T = data.hits.hits

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(T, new_select, query)
        output.meta.es_response_time = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        Log.error("problem formatting", e)
コード例 #40
0
ファイル: aggs.py プロジェクト: klahnakoski/intermittents
def es_aggsop(es, frum, query):
    select = listwrap(query.select)

    es_query = Dict()
    new_select = Dict()
    formula = []
    for s in select:
        if s.aggregate == "count" and (s.value == None or s.value == "."):
            s.pull = "doc_count"
        elif is_keyword(s.value):
            new_select[literal_field(s.value)] += [s]
        else:
            formula.append(s)

    for litral_field, many in new_select.items():
        if len(many)>1:
            canonical_name=literal_field(many[0].name)
            es_query.aggs[canonical_name].stats.field = many[0].value
            for s in many:
                if s.aggregate == "count":
                    s.pull = canonical_name + ".count"
                else:
                    s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
        else:
            s = many[0]
            s.pull = literal_field(s.value) + ".value"
            es_query.aggs[literal_field(s.value)][aggregates1_4[s.aggregate]].field = s.value

    for i, s in enumerate(formula):
        new_select[unicode(i)] = s
        s.pull = literal_field(s.name) + ".value"
        es_query.aggs[literal_field(s.name)][aggregates1_4[s.aggregate]].script = qb_expression_to_ruby(s.value)

    decoders = [AggsDecoder(e, query) for e in coalesce(query.edges, query.groupby, [])]
    start = 0
    for d in decoders:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if query.where:
        filter = simplify_esfilter(query.where)
        es_query = Dict(
            aggs={"_filter": set_default({"filter": filter}, es_query)}
        )

    if len(split_field(frum.name)) > 1:
        es_query = wrap({
            "size": 0,
            "aggs": {"_nested": set_default({
                "nested": {
                    "path": join_field(split_field(frum.name)[1::])
                }
            }, es_query)}
        })

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
        if query.edges:
            output = formatter(decoders, result.aggregations, start, query, select)
        elif query.groupby:
            output = groupby_formatter(decoders, result.aggregations, start, query, select)
        else:
            output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.es_response_time = es_duration.seconds
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet",  format= query.format, cause=e)
        Log.error("Some problem", e)
コード例 #41
0
def qb_expression_to_python(expr):
    if expr == None:
        return "None"
    elif Math.is_number(expr):
        return unicode(expr)
    elif isinstance(expr, Date):
        return unicode(expr.unix)
    elif isinstance(expr, unicode):
        if expr == ".":
            return "row"
        elif is_keyword(expr):
            return "row[" + convert.value2quote(expr) + "]"
        else:
            Log.error("Expecting a json path")
    elif isinstance(expr, CODE):
        return expr.code
    elif expr is True:
        return "True"
    elif expr is False:
        return "False"

    op, term = expr.items()[0]

    mop = python_multi_operators.get(op)
    if mop:
        if isinstance(term, list):
            if not term:
                return mop[1]  # RETURN DEFAULT
            else:
                output = mop[0].join(
                    ["(" + qb_expression_to_python(t) + ")" for t in term])
                return output
        elif isinstance(term, Mapping):
            a, b = term.items()[0]
            output = "(" + qb_expression_to_python(
                a) + ")" + mop[0] + "(" + qb_expression_to_python(b) + ")"
            return output
        else:
            qb_expression_to_python(term)

    bop = python_binary_operators.get(op)
    if bop:
        if isinstance(term, list):
            output = bop.join(
                ["(" + qb_expression_to_python(t) + ")" for t in term])
            return output
        elif isinstance(term, Mapping):
            if op == "eq":
                # eq CAN ACCEPT A WHOLE OBJECT OF key:value PAIRS TO COMPARE
                output = " and ".join("(" + qb_expression_to_python(a) + ")" +
                                      bop + "(" + qb_expression_to_python(b) +
                                      ")" for a, b in term.items())
                return output
            else:
                a, b = term.items()[0]
                output = "(" + qb_expression_to_python(
                    a) + ")" + bop + "(" + qb_expression_to_python(b) + ")"
                return output
        else:
            Log.error("Expecting binary term")

    uop = python_unary_operators.get(op)
    if uop:
        output = uop + "(" + qb_expression_to_python(term) + ")"
        return output

    Log.error("`{{op}}` is not a recognized operation", op=op)
コード例 #42
0
def es_aggsop(es, frum, query):
    select = listwrap(query.select)

    es_query = Dict()
    new_select = Dict()
    formula = []
    for s in select:
        if s.aggregate == "count" and (s.value == None or s.value == "."):
            s.pull = "doc_count"
        elif is_keyword(s.value):
            new_select[literal_field(s.value)] += [s]
        else:
            formula.append(s)

    for litral_field, many in new_select.items():
        if len(many) > 1:
            canonical_name = literal_field(many[0].name)
            es_query.aggs[canonical_name].stats.field = many[0].value
            for s in many:
                if s.aggregate == "count":
                    s.pull = canonical_name + ".count"
                else:
                    s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
        else:
            s = many[0]
            s.pull = literal_field(s.value) + ".value"
            es_query.aggs[literal_field(
                s.value)][aggregates1_4[s.aggregate]].field = s.value

    for i, s in enumerate(formula):
        new_select[unicode(i)] = s
        s.pull = literal_field(s.name) + ".value"
        es_query.aggs[literal_field(s.name)][aggregates1_4[
            s.aggregate]].script = qb_expression_to_ruby(s.value)

    decoders = [
        AggsDecoder(e, query) for e in coalesce(query.edges, query.groupby, [])
    ]
    start = 0
    for d in decoders:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if query.where:
        filter = simplify_esfilter(query.where)
        es_query = Dict(
            aggs={"_filter": set_default({"filter": filter}, es_query)})

    if len(split_field(frum.name)) > 1:
        es_query = wrap({
            "size": 0,
            "aggs": {
                "_nested":
                set_default(
                    {
                        "nested": {
                            "path": join_field(split_field(frum.name)[1::])
                        }
                    }, es_query)
            }
        })

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[
            query.format]
        if query.edges:
            output = formatter(decoders, result.aggregations, start, query,
                               select)
        elif query.groupby:
            output = groupby_formatter(decoders, result.aggregations, start,
                                       query, select)
        else:
            output = aggop_formatter(decoders, result.aggregations, start,
                                     query, select)

        output.meta.es_response_time = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet",
                      format=query.format,
                      cause=e)
        Log.error("Some problem", e)
コード例 #43
0
ファイル: query.py プロジェクト: klahnakoski/MoTreeherder
def _map_term_using_schema(master, path, term, schema_edges):
    """
    IF THE WHERE CLAUSE REFERS TO FIELDS IN THE SCHEMA, THEN EXPAND THEM
    """
    output = DictList()
    for k, v in term.items():
        dimension = schema_edges[k]
        if isinstance(dimension, Dimension):
            domain = dimension.getDomain()
            if dimension.fields:
                if isinstance(dimension.fields, Mapping):
                    # EXPECTING A TUPLE
                    for local_field, es_field in dimension.fields.items():
                        local_value = v[local_field]
                        if local_value == None:
                            output.append({"missing": {"field": es_field}})
                        else:
                            output.append({"term": {es_field: local_value}})
                    continue

                if len(dimension.fields) == 1 and is_keyword(
                        dimension.fields[0]):
                    # SIMPLE SINGLE-VALUED FIELD
                    if domain.getPartByKey(v) is domain.NULL:
                        output.append(
                            {"missing": {
                                "field": dimension.fields[0]
                            }})
                    else:
                        output.append({"term": {dimension.fields[0]: v}})
                    continue

                if AND(is_keyword(f) for f in dimension.fields):
                    # EXPECTING A TUPLE
                    if not isinstance(v, tuple):
                        Log.error("expecing {{name}}={{value}} to be a tuple",
                                  name=k,
                                  value=v)
                    for i, f in enumerate(dimension.fields):
                        vv = v[i]
                        if vv == None:
                            output.append({"missing": {"field": f}})
                        else:
                            output.append({"term": {f: vv}})
                    continue
            if len(dimension.fields) == 1 and is_keyword(dimension.fields[0]):
                if domain.getPartByKey(v) is domain.NULL:
                    output.append({"missing": {"field": dimension.fields[0]}})
                else:
                    output.append({"term": {dimension.fields[0]: v}})
                continue
            if domain.partitions:
                part = domain.getPartByKey(v)
                if part is domain.NULL or not part.esfilter:
                    Log.error("not expected to get NULL")
                output.append(part.esfilter)
                continue
            else:
                Log.error("not expected")
        elif isinstance(v, Mapping):
            sub = _map_term_using_schema(master, path + [k], v,
                                         schema_edges[k])
            output.append(sub)
            continue

        output.append({"term": {k: v}})
    return {"and": output}
コード例 #44
0
def buildCondition(mvel, edge, partition):
    """
    RETURN AN ES FILTER OBJECT
    """
    output = {}

    if edge.domain.isFacet:
        # MUST USE THIS' esFacet
        condition = wrap(coalesce(partition.where, {"and": []}))

        if partition.min and partition.max and is_keyword(edge.value):
            condition["and"].append({
                "range": {edge.value: {"gte": partition.min, "lt": partition.max}}
            })

        # ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT)
        return simplify_esfilter(condition)
    elif edge.range:
        # THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN
        # USE MVEL CODE
        if edge.domain.type in domains.ALGEBRAIC:
            output = {"and": []}

            if edge.range.mode and edge.range.mode == "inclusive":
                # IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE
                if is_keyword(edge.range.min):
                    output["and"].append({"range": {edge.range.min: {"lt": es09.expressions.value2value(partition.max)}}})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        edge.range.min + " < " + es09.expressions.value2MVEL(partition.max)
                    )}})

                if is_keyword(edge.range.max):
                    output["and"].append({"or": [
                        {"missing": {"field": edge.range.max}},
                        {"range": {edge.range.max, {"gt": es09.expressions.value2value(partition.min)}}}
                    ]})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        edge.range.max + " > " + es09.expressions.value2MVEL(partition.min))}})

            else:
                # SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE
                if is_keyword(edge.range.min):
                    output["and"].append({"range": {edge.range.min: {"lte": es09.expressions.value2value(partition.min)}}})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        edge.range.min + "<=" + es09.expressions.value2MVEL(partition.min)
                    )}})

                if is_keyword(edge.range.max):
                    output["and"].append({"or": [
                        {"missing": {"field": edge.range.max}},
                        {"range": {edge.range.max, {"gte": es09.expressions.value2value(partition.min)}}}
                    ]})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        es09.expressions.value2MVEL(partition.min) + " <= " + edge.range.max
                    )}})
            return output
        else:
            Log.error("Do not know how to handle range query on non-continuous domain")

    elif not edge.value:
        # MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE)
        return partition.esfilter
    elif is_keyword(edge.value):
        # USE FAST ES SYNTAX
        if edge.domain.type in domains.ALGEBRAIC:
            output.range = {}
            output.range[edge.value] = {"gte": es09.expressions.value2query(partition.min), "lt": es09.expressions.value2query(partition.max)}
        elif edge.domain.type == "set":
            if partition.value:
                if partition.value != edge.domain.getKey(partition):
                    Log.error("please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former")
                    # DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS
                output.term = {edge.value: partition.value}
            else:
                output.term = {edge.value: edge.domain.getKey(partition)}

        elif edge.domain.type == "default":
            output.term = dict()
            output.term[edge.value] = partition.value
        else:
            Log.error("Edge \"" + edge.name + "\" is not supported")

        return output
    else:
        # USE MVEL CODE
        if edge.domain.type in domains.ALGEBRAIC:
            output.script = {"script": edge.value + ">=" + es09.expressions.value2MVEL(partition.min) + " and " + edge.value + "<" + es09.expressions.value2MVEL(partition.max)}
        else:
            output.script = {"script": "( " + edge.value + " ) ==" + es09.expressions.value2MVEL(partition.value)}

        code = es09.expressions.addFunctions(output.script.script)
        output.script.script = code.head + code.body
        return output