Example #1
0
    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        self.domain = edge.domain
        self.domain.limit = Math.min(
            coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
        self.script = self.edge.value.partial_eval().to_es_script(self.schema)
        self.pull = pull_functions[self.script.data_type]
        self.missing = self.script.miss.partial_eval()
        self.exists = NotOp("not", self.missing).partial_eval()

        # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
        sort_candidates = [
            s for s in self.query.sort if s.value == self.edge.value
        ]
        if sort_candidates:
            self.es_order = {
                "_term": {
                    1: "asc",
                    -1: "desc"
                }[sort_candidates[0].sort]
            }
        else:
            self.es_order = None
Example #2
0
    def append_query(self, query_path, es_query):
        domain = self.domain
        domain_key = domain.key
        value = self.edge.value
        cnv = pull_functions[value.type]
        include = tuple(cnv(p[domain_key]) for p in domain.partitions)

        exists = AndOp("and", [
            InOp("in", [value, Literal("literal", include)])
        ]).partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if isinstance(value, Variable):
            es_field = first(self.query.frum.schema.leaves(value.var)).es_column  # ALREADY CHECKED THERE IS ONLY ONE
            match = TermsAggs(
                "_match",
                {
                    "field": es_field,
                    "size": limit,
                    "order": {"_term": self.sorted} if self.sorted else None
                },
                self
            )
        else:
            match = TermsAggs(
                "_match",
                {
                    "script": {
                        "lang": "painless",
                        "inline": value.to_es_script(self.schema).script(self.schema)
                    },
                    "size": limit
                },
                self
            )
        output = Aggs().add(FilterAggs("_filter", exists, None).add(match.add(es_query)))

        if self.edge.allowNulls:
            # FIND NULLS AT EACH NESTED LEVEL
            for p in self.schema.query_path:
                if p == query_path:
                    # MISSING AT THE QUERY DEPTH
                    output.add(
                        NestedAggs(p).add(FilterAggs("_missing0", NotOp(None, exists), self).add(es_query))
                    )
                else:
                    # PARENT HAS NO CHILDREN, SO MISSING
                    column = first(self.schema.values(query_path, (OBJECT, EXISTS)))
                    output.add(
                        NestedAggs(column.nested_path[0]).add(
                            FilterAggs(
                                "_missing1",
                                NotOp(None, ExistsOp(None, Variable(column.es_column.replace(NESTED_TYPE, EXISTS_TYPE)))),
                                self
                            ).add(es_query)
                        )
                    )
        return output
Example #3
0
    def append_query(self, es_query, start):
        self.start = start

        parts = self.edge.domain.partitions
        filters = []
        notty = []

        for p in parts:
            w = p.where
            filters.append(AndOp("and", [w] + notty).to_esfilter(self.schema))
            notty.append(NotOp("not", w))

        missing_filter = None
        if self.edge.allowNulls:  # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
            missing_filter = set_default(
                {"filter": AndOp("and", notty).to_esfilter(self.schema)},
                es_query
            )

        return wrap({"aggs": {
            "_match": set_default(
                {"filters": {"filters": filters}},
                es_query
            ),
            "_missing": missing_filter
        }})
Example #4
0
def _range_composer(edge, domain, es_query, to_float, schema):
    # USE RANGES
    _min = coalesce(domain.min, MIN(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if edge.allowNulls:
        missing_filter = set_default(
            {
                "filter": NotOp("not", AndOp("and", [
                    edge.value.exists(),
                    InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]),
                    InequalityOp("lt", [edge.value, Literal(None, to_float(_max))])
                ]).partial_eval()).to_esfilter(schema)
            },
            es_query
        )
    else:
        missing_filter = None

    if isinstance(edge.value, Variable):
        calc = {"field": schema.leaves(edge.value.var)[0].es_column}
    else:
        calc = {"script": edge.value.to_painless(schema).script(schema)}

    return wrap({"aggs": {
        "_match": set_default(
            {"range": calc},
            {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
            es_query
        ),
        "_missing": missing_filter
    }})
Example #5
0
def _range_composer(self, edge, domain, es_query, to_float, schema):
    # USE RANGES
    _min = coalesce(domain.min, MIN(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    output = Aggs()
    if edge.allowNulls:
        output.add(
            FilterAggs(
                "_missing",
                NotOp(
                    AndOp([
                        edge.value.exists(),
                        GteOp([edge.value, Literal(to_float(_min))]),
                        LtOp([edge.value, Literal(to_float(_max))])
                    ]).partial_eval()), self).add(es_query))

    if is_op(edge.value, Variable):
        calc = {"field": first(schema.leaves(edge.value.var)).es_column}
    else:
        calc = {"script": text_type(Painless[edge.value].to_es_script(schema))}
    calc['ranges'] = [{
        "from": to_float(p.min),
        "to": to_float(p.max)
    } for p in domain.partitions]

    return output.add(RangeAggs("_match", calc, self).add(es_query))
Example #6
0
    def append_query(self, es_query, start):
        self.start = start
        domain = self.domain

        domain_key = domain.key
        include, text_include = zip(*(
            (
                float(v) if isinstance(v, (int, float)) else v,
                text_type(float(v)) if isinstance(v, (int, float)) else v
            )
            for v in (p[domain_key] for p in domain.partitions)
        ))
        value = self.edge.value
        exists = AndOp("and", [
            value.exists(),
            InOp("in", [value, Literal("literal", include)])
        ]).partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if isinstance(value, Variable):
            es_field = self.query.frum.schema.leaves(value.var)[0].es_column  # ALREADY CHECKED THERE IS ONLY ONE
            terms = set_default({"terms": {
                "field": es_field,
                "size": limit,
                "order": {"_term": self.sorted} if self.sorted else None
            }}, es_query)
        else:
            terms = set_default({"terms": {
                "script": {
                    "lang": "painless",
                    "inline": value.to_painless(self.schema).script(self.schema)
                },
                "size": limit
            }}, es_query)

        if self.edge.allowNulls:
            missing = set_default(
                {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                es_query
            )
        else:
            missing = None

        return wrap({"aggs": {
            "_match": {
                "filter": exists.to_esfilter(self.schema),
                "aggs": {
                    "_filter": terms
                }
            },
            "_missing": missing
        }})
Example #7
0
    def append_query(self, query_path, es_query):
        parts = self.edge.domain.partitions
        filters = []
        notty = []
        for p in parts:
            w = p.where
            filters.append(AndOp([w] + notty))
            notty.append(NotOp(w))

        output = Aggs().add(FiltersAggs("_match", filters, self).add(es_query))
        if self.edge.allowNulls:  # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
            output.add(FilterAggs("_missing", AndOp(notty), self).add(es_query))

        return output
Example #8
0
    def append_query(self, query_path, es_query):
        decoder = self
        for i, v in enumerate(self.fields):
            exists = v.exists().partial_eval()
            nest = Aggs()
            nest.add(TermsAggs("_match", {
                "field": first(self.schema.leaves(v.var)).es_column,
                "size": self.domain.limit
            }, decoder).add(es_query))
            nest.add(FilterAggs("_missing", NotOp(exists), decoder).add(es_query))
            es_query = nest
            decoder = None

        if self.domain.where:
            es_query = FilterAggs("_filter", self.domain.where, None).add(es_query)

        return es_query
Example #9
0
    def append_query(self, query_path, es_query):
        domain = self.domain
        domain_key = domain.key
        value = self.edge.value
        cnv = pull_functions[value.type]
        include = tuple(cnv(p[domain_key]) for p in domain.partitions)

        schema = self.schema
        exists = InOp([value, Literal(include)]).partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if is_op(value, Variable):
            es_field = first(schema.leaves(value.var)).es_column  # ALREADY CHECKED THERE IS ONLY ONE
            match = TermsAggs(
                "_match",
                {
                    "field": es_field,
                    "size": limit,
                    "order": {"_term": self.sorted} if self.sorted else None
                },
                self
            )
        else:
            match = TermsAggs(
                "_match",
                {
                    "script": text(Painless[value].to_es_script(schema)),
                    "size": limit
                },
                self
            )
        output = Aggs().add(FilterAggs("_filter", exists, None).add(match.add(es_query)))

        if self.edge.allowNulls:
            # IF ALL NESTED COLUMNS ARE NULL, DOES THE FILTER PASS?
            # MISSING AT THE QUERY DEPTH
            # columns = schema[value.var]
            concat_inner = split_expression(NotOp(exists), self.query)
            for i, term in enumerate(concat_inner.terms):
                acc = es_query
                for nest in term.nests:
                    if nest.where is not TRUE:
                        acc = NestedAggs(nest.path.var).add(FilterAggs("_missing" + text(i), nest.where, self).add(acc))
                output.add(acc)
        return output
Example #10
0
    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        self.domain = edge.domain
        self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
        self.script = self.edge.value.partial_eval().to_es_script(self.schema)
        self.pull = pull_functions[self.script.data_type]
        self.missing = self.script.miss.partial_eval()
        self.exists = NotOp("not", self.missing).partial_eval()

        # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
        sort_candidates = [s for s in self.query.sort if s.value == self.edge.value]
        if sort_candidates:
            self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]}
        else:
            self.es_order = None
Example #11
0
    def append_query(self, query_path, es_query):
        domain = self.domain
        domain_key = domain.key
        value = Painless[self.edge.value]
        cnv = pull_functions[value.type]
        include = tuple(cnv(p[domain_key]) for p in domain.partitions)

        schema = self.schema
        exists = Painless[AndOp([InOp([value,
                                       Literal(include)])])].partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if is_op(value, Variable):
            es_field = first(schema.leaves(
                value.var)).es_column  # ALREADY CHECKED THERE IS ONLY ONE
            match = TermsAggs(
                "_match", {
                    "field": es_field,
                    "size": limit,
                    "order": {
                        "_term": self.sorted
                    } if self.sorted else None
                }, self)
        else:
            match = TermsAggs("_match", {
                "script": text(value.to_es_script(schema)),
                "size": limit
            }, self)
        output = Aggs().add(
            FilterAggs("_filter", exists, None).add(match.add(es_query)))

        if self.edge.allowNulls:
            # IF ALL NESTED COLUMNS ARE NULL, DOES THE FILTER PASS?
            # MISSING AT THE QUERY DEPTH
            op, split = split_expression_by_path(NotOp(exists), schema)
            for i, p in enumerate(reversed(sorted(split.keys()))):
                e = split.get(p)
                if e:
                    not_match = NestedAggs(p).add(
                        FilterAggs("_missing" + text(i), e,
                                   self).add(es_query))
                    output.add(not_match)
        return output
Example #12
0
    def append_query(self, es_query, start):
        # TODO: USE "reverse_nested" QUERY TO PULL THESE
        self.start = start
        for i, v in enumerate(self.fields):
            exists = v.exists().partial_eval()
            nest = wrap({
                "aggs": {
                    "_match": {
                        "filter": exists.to_esfilter(self.schema),
                        "aggs": {
                            "_filter":
                            set_default(
                                {
                                    "terms": {
                                        "field": self.schema.leaves(
                                            v.var)[0].es_column,
                                        "size": self.domain.limit
                                    }
                                }, es_query)
                        }
                    }
                }
            })
            if self.edge.allowNulls:
                nest.aggs._missing = set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query)
            es_query = nest

        if self.domain.where:
            filter_ = self.domain.where.partial_eval().to_esfilter(self.schema)
            es_query = {
                "aggs": {
                    "_filter": set_default({"filter": filter_}, es_query)
                }
            }

        return es_query
Example #13
0
def _range_composer(self, edge, domain, es_query, to_float, schema):
    # USE RANGES
    _min = coalesce(domain.min, MIN(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    output = Aggs()
    if edge.allowNulls:
        output.add(FilterAggs(
            "_missing",
            NotOp("not", AndOp("and", [
                edge.value.exists(),
                InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]),
                InequalityOp("lt", [edge.value, Literal(None, to_float(_max))])
            ]).partial_eval()),
            self
        ).add(es_query))

    if isinstance(edge.value, Variable):
        calc = {"field": first(schema.leaves(edge.value.var)).es_column}
    else:
        calc = {"script": edge.value.to_es_script(schema).script(schema)}
    calc['ranges'] = [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]

    return output.add(RangeAggs("_match", calc, self).add(es_query))
Example #14
0
    def append_query(self, es_query, start):
        self.start = start

        value = self.edge.value.partial_eval()
        script = value.to_painless(self.schema)
        exists = NotOp("not", script.miss).partial_eval()
        if not isinstance(self.edge.value, Variable):

            output = wrap({"aggs": {
                "_match": {
                    "filter": exists.to_esfilter(self.schema),
                    "aggs": {
                        "_filter": set_default(
                            {"terms": {
                                "script": {
                                    "lang": "painless",
                                    "inline": script.expr
                                },
                                "size": self.domain.limit,
                                "order": {"_term": self.sorted} if self.sorted else None
                            }},
                            es_query
                        )
                    }
                },
                "_missing": set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
        elif self.edge.value.var in [s.value.var for s in self.query.sort]:
            sort_dir = [s.sort for s in self.query.sort if s.value.var == self.edge.value.var][0]
            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "field": self.schema.leaves(self.edge.value.var)[0].es_column,
                        "size": self.domain.limit,
                        "order": {"_term": "asc" if sort_dir == 1 else "desc"}
                    }},
                    es_query
                ),
                "_missing": set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
        else:
            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "field": self.schema.leaves(self.edge.value.var)[0].es_column,
                        "size": self.domain.limit
                    }},
                    es_query
                ),
                "_missing": set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
Example #15
0
class DefaultDecoder(SetDecoder):
    # FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES)

    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        self.domain = edge.domain
        self.domain.limit = Math.min(
            coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
        self.script = self.edge.value.partial_eval().to_es_script(self.schema)
        self.pull = pull_functions[self.script.data_type]
        self.missing = self.script.miss.partial_eval()
        self.exists = NotOp("not", self.missing).partial_eval()

        # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
        sort_candidates = [
            s for s in self.query.sort if s.value == self.edge.value
        ]
        if sort_candidates:
            self.es_order = {
                "_term": {
                    1: "asc",
                    -1: "desc"
                }[sort_candidates[0].sort]
            }
        else:
            self.es_order = None

    def append_query(self, es_query, start):
        self.start = start

        if not isinstance(self.edge.value, Variable):
            if self.exists is TRUE:
                # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
                output = wrap({
                    "aggs": {
                        "_match":
                        set_default(
                            {
                                "terms": {
                                    "script": {
                                        "lang": "painless",
                                        "inline": self.script.expr
                                    },
                                    "size": self.domain.limit,
                                    "order": self.es_order
                                }
                            }, es_query)
                    }
                })
            else:
                output = wrap({
                    "aggs": {
                        "_match":
                        {  # _match AND _filter REVERSED SO _match LINES UP WITH _missing
                            "filter": self.exists.to_esfilter(self.schema),
                            "aggs": {
                                "_filter":
                                set_default(
                                    {
                                        "terms": {
                                            "script": {
                                                "lang": "painless",
                                                "inline": self.script.expr
                                            },
                                            "size": self.domain.limit,
                                            "order": self.es_order
                                        }
                                    }, es_query)
                            }
                        },
                        "_missing":
                        set_default(
                            {"filter": self.missing.to_esfilter(self.schema)},
                            es_query)
                    }
                })
            return output
        else:
            output = wrap({
                "aggs": {
                    "_match":
                    set_default(
                        {
                            "terms": {
                                "field":
                                self.schema.leaves(
                                    self.edge.value.var)[0].es_column,
                                "size":
                                self.domain.limit,
                                "order":
                                self.es_order
                            }
                        }, es_query),
                    "_missing":
                    set_default(
                        {"filter": self.missing.to_esfilter(self.schema)},
                        es_query)
                }
            })
            return output

    def count(self, row):
        part = row[self.start]
        if part['doc_count']:
            if part.get('key') != None:
                self.parts.append(self.pull(part.get('key')))
            else:
                self.edge.allowNulls = True  # OK! WE WILL ALLOW NULLS

    def done_count(self):
        self.edge.domain = self.domain = SimpleSetDomain(
            partitions=jx.sort(set(self.parts)))
        self.parts = None
        self.computed_domain = True

    def get_index(self, row):
        if self.computed_domain:
            try:
                part = row[self.start]
                return self.domain.getIndexByKey(self.pull(part.get('key')))
            except Exception as e:
                Log.error("problem", cause=e)
        else:
            try:
                part = row[self.start]
                key = self.pull(part.get('key'))
                i = self.key2index.get(key)
                if i is None:
                    i = len(self.parts)
                    part = {"key": key, "dataIndex": i}
                    self.parts.append(part)
                    self.key2index[key] = i
                return i
            except Exception as e:
                Log.error("problem", cause=e)

    @property
    def num_columns(self):
        return 1
Example #16
0
class DefaultDecoder(SetDecoder):
    # FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES)

    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        self.domain = edge.domain
        self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
        self.script = self.edge.value.partial_eval().to_es_script(self.schema)
        self.pull = pull_functions[self.script.data_type]
        self.missing = self.script.miss.partial_eval()
        self.exists = NotOp("not", self.missing).partial_eval()

        # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
        sort_candidates = [s for s in self.query.sort if s.value == self.edge.value]
        if sort_candidates:
            self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]}
        else:
            self.es_order = None

    def append_query(self, es_query, start):
        self.start = start

        if not isinstance(self.edge.value, Variable):
            if self.exists is TRUE:
                # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
                output = wrap({"aggs": {
                    "_match": set_default(
                        {"terms": {
                            "script": {"lang": "painless", "inline": self.script.expr},
                            "size": self.domain.limit,
                            "order": self.es_order
                        }},
                        es_query
                    )
                }})
            else:
                output = wrap({"aggs": {
                    "_match": {  # _match AND _filter REVERSED SO _match LINES UP WITH _missing
                        "filter": self.exists.to_esfilter(self.schema),
                        "aggs": {
                            "_filter": set_default(
                                {"terms": {
                                    "script": {"lang": "painless", "inline": self.script.expr},
                                    "size": self.domain.limit,
                                    "order": self.es_order
                                }},
                                es_query
                            )
                        }
                    },
                    "_missing": set_default(
                        {"filter": self.missing.to_esfilter(self.schema)},
                        es_query
                    )
                }})
            return output
        else:
            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "field": self.schema.leaves(self.edge.value.var)[0].es_column,
                        "size": self.domain.limit,
                        "order": self.es_order
                    }},
                    es_query
                ),
                "_missing": set_default(
                    {"filter": self.missing.to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output

    def count(self, row):
        part = row[self.start]
        if part['doc_count']:
            if part.get('key') != None:
                self.parts.append(self.pull(part.get('key')))
            else:
                self.edge.allowNulls = True  # OK! WE WILL ALLOW NULLS

    def done_count(self):
        self.edge.domain = self.domain = SimpleSetDomain(
            partitions=jx.sort(set(self.parts))
        )
        self.parts = None
        self.computed_domain = True

    def get_index(self, row):
        if self.computed_domain:
            try:
                part = row[self.start]
                return self.domain.getIndexByKey(self.pull(part.get('key')))
            except Exception as e:
                Log.error("problem", cause=e)
        else:
            try:
                part = row[self.start]
                key = self.pull(part.get('key'))
                i = self.key2index.get(key)
                if i is None:
                    i = len(self.parts)
                    part = {"key": key, "dataIndex": i}
                    self.parts.append(part)
                    self.key2index[key] = i
                return i
            except Exception as e:
                Log.error("problem", cause=e)

    @property
    def num_columns(self):
        return 1