예제 #1
0
class DefaultDecoder(SetDecoder):
    # FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES)

    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        self.domain = edge.domain
        self.domain.limit = Math.min(
            coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
        self.script = self.edge.value.partial_eval().to_es_script(self.schema)
        self.pull = pull_functions[self.script.data_type]
        self.missing = self.script.miss.partial_eval()
        self.exists = NotOp("not", self.missing).partial_eval()

        # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
        sort_candidates = [
            s for s in self.query.sort if s.value == self.edge.value
        ]
        if sort_candidates:
            self.es_order = {
                "_term": {
                    1: "asc",
                    -1: "desc"
                }[sort_candidates[0].sort]
            }
        else:
            self.es_order = None

    def append_query(self, es_query, start):
        self.start = start

        if not isinstance(self.edge.value, Variable):
            if self.exists is TRUE:
                # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
                output = wrap({
                    "aggs": {
                        "_match":
                        set_default(
                            {
                                "terms": {
                                    "script": {
                                        "lang": "painless",
                                        "inline": self.script.expr
                                    },
                                    "size": self.domain.limit,
                                    "order": self.es_order
                                }
                            }, es_query)
                    }
                })
            else:
                output = wrap({
                    "aggs": {
                        "_match":
                        {  # _match AND _filter REVERSED SO _match LINES UP WITH _missing
                            "filter": self.exists.to_esfilter(self.schema),
                            "aggs": {
                                "_filter":
                                set_default(
                                    {
                                        "terms": {
                                            "script": {
                                                "lang": "painless",
                                                "inline": self.script.expr
                                            },
                                            "size": self.domain.limit,
                                            "order": self.es_order
                                        }
                                    }, es_query)
                            }
                        },
                        "_missing":
                        set_default(
                            {"filter": self.missing.to_esfilter(self.schema)},
                            es_query)
                    }
                })
            return output
        else:
            output = wrap({
                "aggs": {
                    "_match":
                    set_default(
                        {
                            "terms": {
                                "field":
                                self.schema.leaves(
                                    self.edge.value.var)[0].es_column,
                                "size":
                                self.domain.limit,
                                "order":
                                self.es_order
                            }
                        }, es_query),
                    "_missing":
                    set_default(
                        {"filter": self.missing.to_esfilter(self.schema)},
                        es_query)
                }
            })
            return output

    def count(self, row):
        part = row[self.start]
        if part['doc_count']:
            if part.get('key') != None:
                self.parts.append(self.pull(part.get('key')))
            else:
                self.edge.allowNulls = True  # OK! WE WILL ALLOW NULLS

    def done_count(self):
        self.edge.domain = self.domain = SimpleSetDomain(
            partitions=jx.sort(set(self.parts)))
        self.parts = None
        self.computed_domain = True

    def get_index(self, row):
        if self.computed_domain:
            try:
                part = row[self.start]
                return self.domain.getIndexByKey(self.pull(part.get('key')))
            except Exception as e:
                Log.error("problem", cause=e)
        else:
            try:
                part = row[self.start]
                key = self.pull(part.get('key'))
                i = self.key2index.get(key)
                if i is None:
                    i = len(self.parts)
                    part = {"key": key, "dataIndex": i}
                    self.parts.append(part)
                    self.key2index[key] = i
                return i
            except Exception as e:
                Log.error("problem", cause=e)

    @property
    def num_columns(self):
        return 1
예제 #2
0
    def append_query(self, es_query, start):
        self.start = start

        value = self.edge.value.partial_eval()
        script = value.to_painless(self.schema)
        exists = NotOp("not", script.miss).partial_eval()
        if not isinstance(self.edge.value, Variable):

            output = wrap({"aggs": {
                "_match": {
                    "filter": exists.to_esfilter(self.schema),
                    "aggs": {
                        "_filter": set_default(
                            {"terms": {
                                "script": {
                                    "lang": "painless",
                                    "inline": script.expr
                                },
                                "size": self.domain.limit,
                                "order": {"_term": self.sorted} if self.sorted else None
                            }},
                            es_query
                        )
                    }
                },
                "_missing": set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
        elif self.edge.value.var in [s.value.var for s in self.query.sort]:
            sort_dir = [s.sort for s in self.query.sort if s.value.var == self.edge.value.var][0]
            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "field": self.schema.leaves(self.edge.value.var)[0].es_column,
                        "size": self.domain.limit,
                        "order": {"_term": "asc" if sort_dir == 1 else "desc"}
                    }},
                    es_query
                ),
                "_missing": set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
        else:
            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "field": self.schema.leaves(self.edge.value.var)[0].es_column,
                        "size": self.domain.limit
                    }},
                    es_query
                ),
                "_missing": set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
예제 #3
0
파일: decoders.py 프로젝트: rv404674/TUID
class DefaultDecoder(SetDecoder):
    # FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES)

    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        self.domain = edge.domain
        self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
        self.script = self.edge.value.partial_eval().to_es_script(self.schema)
        self.pull = pull_functions[self.script.data_type]
        self.missing = self.script.miss.partial_eval()
        self.exists = NotOp("not", self.missing).partial_eval()

        # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
        sort_candidates = [s for s in self.query.sort if s.value == self.edge.value]
        if sort_candidates:
            self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]}
        else:
            self.es_order = None

    def append_query(self, es_query, start):
        self.start = start

        if not isinstance(self.edge.value, Variable):
            if self.exists is TRUE:
                # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
                output = wrap({"aggs": {
                    "_match": set_default(
                        {"terms": {
                            "script": {"lang": "painless", "inline": self.script.expr},
                            "size": self.domain.limit,
                            "order": self.es_order
                        }},
                        es_query
                    )
                }})
            else:
                output = wrap({"aggs": {
                    "_match": {  # _match AND _filter REVERSED SO _match LINES UP WITH _missing
                        "filter": self.exists.to_esfilter(self.schema),
                        "aggs": {
                            "_filter": set_default(
                                {"terms": {
                                    "script": {"lang": "painless", "inline": self.script.expr},
                                    "size": self.domain.limit,
                                    "order": self.es_order
                                }},
                                es_query
                            )
                        }
                    },
                    "_missing": set_default(
                        {"filter": self.missing.to_esfilter(self.schema)},
                        es_query
                    )
                }})
            return output
        else:
            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "field": self.schema.leaves(self.edge.value.var)[0].es_column,
                        "size": self.domain.limit,
                        "order": self.es_order
                    }},
                    es_query
                ),
                "_missing": set_default(
                    {"filter": self.missing.to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output

    def count(self, row):
        part = row[self.start]
        if part['doc_count']:
            if part.get('key') != None:
                self.parts.append(self.pull(part.get('key')))
            else:
                self.edge.allowNulls = True  # OK! WE WILL ALLOW NULLS

    def done_count(self):
        self.edge.domain = self.domain = SimpleSetDomain(
            partitions=jx.sort(set(self.parts))
        )
        self.parts = None
        self.computed_domain = True

    def get_index(self, row):
        if self.computed_domain:
            try:
                part = row[self.start]
                return self.domain.getIndexByKey(self.pull(part.get('key')))
            except Exception as e:
                Log.error("problem", cause=e)
        else:
            try:
                part = row[self.start]
                key = self.pull(part.get('key'))
                i = self.key2index.get(key)
                if i is None:
                    i = len(self.parts)
                    part = {"key": key, "dataIndex": i}
                    self.parts.append(part)
                    self.key2index[key] = i
                return i
            except Exception as e:
                Log.error("problem", cause=e)

    @property
    def num_columns(self):
        return 1