Beispiel #1
0
def _range_composer(self, edge, domain, es_query, to_float, schema):
    # USE RANGES
    _min = coalesce(domain.min, MIN(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    output = Aggs()
    if edge.allowNulls:
        output.add(
            FilterAggs(
                "_missing",
                NotOp(
                    AndOp([
                        edge.value.exists(),
                        GteOp([edge.value, Literal(to_float(_min))]),
                        LtOp([edge.value, Literal(to_float(_max))])
                    ]).partial_eval()), self).add(es_query))

    if is_op(edge.value, Variable):
        calc = {"field": first(schema.leaves(edge.value.var)).es_column}
    else:
        calc = {"script": text_type(Painless[edge.value].to_es_script(schema))}
    calc['ranges'] = [{
        "from": to_float(p.min),
        "to": to_float(p.max)
    } for p in domain.partitions]

    return output.add(RangeAggs("_match", calc, self).add(es_query))
Beispiel #2
0
def _range_composer(edge, domain, es_query, to_float, schema):
    # USE RANGES
    _min = coalesce(domain.min, MIN(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if edge.allowNulls:
        missing_filter = set_default(
            {
                "filter": NotOp("not", AndOp("and", [
                    edge.value.exists(),
                    InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]),
                    InequalityOp("lt", [edge.value, Literal(None, to_float(_max))])
                ]).partial_eval()).to_esfilter(schema)
            },
            es_query
        )
    else:
        missing_filter = None

    if isinstance(edge.value, Variable):
        calc = {"field": schema.leaves(edge.value.var)[0].es_column}
    else:
        calc = {"script": edge.value.to_painless(schema).script(schema)}

    return wrap({"aggs": {
        "_match": set_default(
            {"range": calc},
            {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
            es_query
        ),
        "_missing": missing_filter
    }})
Beispiel #3
0
    def append_query(self, query_path, es_query):
        edge = self.edge
        range = edge.range
        domain = edge.domain

        aggs = Aggs()
        for i, p in enumerate(domain.partitions):
            filter_ = AndOp([
                LteOp([range.min, Literal(self.to_float(p.min))]),
                GtOp([range.max, Literal(self.to_float(p.min))])
            ])
            aggs.add(FilterAggs("_match" + text(i), filter_, self).add(es_query))

        return aggs
Beispiel #4
0
    def append_query(self, query_path, es_query):
        edge = self.edge
        range = edge.range
        domain = edge.domain

        aggs = Aggs()
        for i, p in enumerate(domain.partitions):
            filter_ = AndOp("and", [
                InequalityOp("lte", [range.min, Literal("literal", self.to_float(p.min))]),
                InequalityOp("gt", [range.max, Literal("literal", self.to_float(p.min))])
            ])
            aggs.add(FilterAggs("_match" + text_type(i), filter_, self).add(es_query))

        return aggs
Beispiel #5
0
    def append_query(self, query_path, es_query):
        domain = self.domain
        domain_key = domain.key
        value = self.edge.value
        cnv = pull_functions[value.type]
        include = tuple(cnv(p[domain_key]) for p in domain.partitions)

        exists = AndOp("and", [
            InOp("in", [value, Literal("literal", include)])
        ]).partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if isinstance(value, Variable):
            es_field = first(self.query.frum.schema.leaves(value.var)).es_column  # ALREADY CHECKED THERE IS ONLY ONE
            match = TermsAggs(
                "_match",
                {
                    "field": es_field,
                    "size": limit,
                    "order": {"_term": self.sorted} if self.sorted else None
                },
                self
            )
        else:
            match = TermsAggs(
                "_match",
                {
                    "script": {
                        "lang": "painless",
                        "inline": value.to_es_script(self.schema).script(self.schema)
                    },
                    "size": limit
                },
                self
            )
        output = Aggs().add(FilterAggs("_filter", exists, None).add(match.add(es_query)))

        if self.edge.allowNulls:
            # FIND NULLS AT EACH NESTED LEVEL
            for p in self.schema.query_path:
                if p == query_path:
                    # MISSING AT THE QUERY DEPTH
                    output.add(
                        NestedAggs(p).add(FilterAggs("_missing0", NotOp(None, exists), self).add(es_query))
                    )
                else:
                    # PARENT HAS NO CHILDREN, SO MISSING
                    column = first(self.schema.values(query_path, (OBJECT, EXISTS)))
                    output.add(
                        NestedAggs(column.nested_path[0]).add(
                            FilterAggs(
                                "_missing1",
                                NotOp(None, ExistsOp(None, Variable(column.es_column.replace(NESTED_TYPE, EXISTS_TYPE)))),
                                self
                            ).add(es_query)
                        )
                    )
        return output
Beispiel #6
0
    def append_query(self, es_query, start):
        self.start = start

        edge = self.edge
        range = edge.range
        domain = edge.domain

        aggs = {}
        for i, p in enumerate(domain.partitions):
            filter_ = AndOp("and", [
                InequalityOp("lte", [range.min, Literal("literal", self.to_float(p.min))]),
                InequalityOp("gt", [range.max, Literal("literal", self.to_float(p.min))])
            ])
            aggs["_join_" + text_type(i)] = set_default(
                {"filter": filter_.to_esfilter(self.schema)},
                es_query
            )

        return wrap({"aggs": aggs})
Beispiel #7
0
    def append_query(self, es_query, start):
        self.start = start
        domain = self.domain

        domain_key = domain.key
        include, text_include = zip(*(
            (
                float(v) if isinstance(v, (int, float)) else v,
                text_type(float(v)) if isinstance(v, (int, float)) else v
            )
            for v in (p[domain_key] for p in domain.partitions)
        ))
        value = self.edge.value
        exists = AndOp("and", [
            value.exists(),
            InOp("in", [value, Literal("literal", include)])
        ]).partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if isinstance(value, Variable):
            es_field = self.query.frum.schema.leaves(value.var)[0].es_column  # ALREADY CHECKED THERE IS ONLY ONE
            terms = set_default({"terms": {
                "field": es_field,
                "size": limit,
                "order": {"_term": self.sorted} if self.sorted else None
            }}, es_query)
        else:
            terms = set_default({"terms": {
                "script": {
                    "lang": "painless",
                    "inline": value.to_painless(self.schema).script(self.schema)
                },
                "size": limit
            }}, es_query)

        if self.edge.allowNulls:
            missing = set_default(
                {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                es_query
            )
        else:
            missing = None

        return wrap({"aggs": {
            "_match": {
                "filter": exists.to_esfilter(self.schema),
                "aggs": {
                    "_filter": terms
                }
            },
            "_missing": missing
        }})
Beispiel #8
0
def _range_composer(self, edge, domain, es_query, to_float, schema):
    # USE RANGES
    _min = coalesce(domain.min, MIN(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    output = Aggs()
    if edge.allowNulls:
        output.add(FilterAggs(
            "_missing",
            NotOp("not", AndOp("and", [
                edge.value.exists(),
                InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]),
                InequalityOp("lt", [edge.value, Literal(None, to_float(_max))])
            ]).partial_eval()),
            self
        ).add(es_query))

    if isinstance(edge.value, Variable):
        calc = {"field": first(schema.leaves(edge.value.var)).es_column}
    else:
        calc = {"script": edge.value.to_es_script(schema).script(schema)}
    calc['ranges'] = [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]

    return output.add(RangeAggs("_match", calc, self).add(es_query))
Beispiel #9
0
    def append_query(self, query_path, es_query):
        domain = self.domain
        domain_key = domain.key
        value = self.edge.value
        cnv = pull_functions[value.type]
        include = tuple(cnv(p[domain_key]) for p in domain.partitions)

        schema = self.schema
        exists = InOp([value, Literal(include)]).partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if is_op(value, Variable):
            es_field = first(schema.leaves(value.var)).es_column  # ALREADY CHECKED THERE IS ONLY ONE
            match = TermsAggs(
                "_match",
                {
                    "field": es_field,
                    "size": limit,
                    "order": {"_term": self.sorted} if self.sorted else None
                },
                self
            )
        else:
            match = TermsAggs(
                "_match",
                {
                    "script": text(Painless[value].to_es_script(schema)),
                    "size": limit
                },
                self
            )
        output = Aggs().add(FilterAggs("_filter", exists, None).add(match.add(es_query)))

        if self.edge.allowNulls:
            # IF ALL NESTED COLUMNS ARE NULL, DOES THE FILTER PASS?
            # MISSING AT THE QUERY DEPTH
            # columns = schema[value.var]
            concat_inner = split_expression(NotOp(exists), self.query)
            for i, term in enumerate(concat_inner.terms):
                acc = es_query
                for nest in term.nests:
                    if nest.where is not TRUE:
                        acc = NestedAggs(nest.path.var).add(FilterAggs("_missing" + text(i), nest.where, self).add(acc))
                output.add(acc)
        return output
Beispiel #10
0
    def append_query(self, query_path, es_query):
        domain = self.domain
        domain_key = domain.key
        value = Painless[self.edge.value]
        cnv = pull_functions[value.type]
        include = tuple(cnv(p[domain_key]) for p in domain.partitions)

        schema = self.schema
        exists = Painless[AndOp([InOp([value,
                                       Literal(include)])])].partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if is_op(value, Variable):
            es_field = first(schema.leaves(
                value.var)).es_column  # ALREADY CHECKED THERE IS ONLY ONE
            match = TermsAggs(
                "_match", {
                    "field": es_field,
                    "size": limit,
                    "order": {
                        "_term": self.sorted
                    } if self.sorted else None
                }, self)
        else:
            match = TermsAggs("_match", {
                "script": text(value.to_es_script(schema)),
                "size": limit
            }, self)
        output = Aggs().add(
            FilterAggs("_filter", exists, None).add(match.add(es_query)))

        if self.edge.allowNulls:
            # IF ALL NESTED COLUMNS ARE NULL, DOES THE FILTER PASS?
            # MISSING AT THE QUERY DEPTH
            op, split = split_expression_by_path(NotOp(exists), schema)
            for i, p in enumerate(reversed(sorted(split.keys()))):
                e = split.get(p)
                if e:
                    not_match = NestedAggs(p).add(
                        FilterAggs("_missing" + text(i), e,
                                   self).add(es_query))
                    output.add(not_match)
        return output