class DefaultDecoder(SetDecoder): # FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES) def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.domain = edge.domain self.domain.limit = Math.min( coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list() self.key2index = {} self.computed_domain = False self.script = self.edge.value.partial_eval().to_es_script(self.schema) self.pull = pull_functions[self.script.data_type] self.missing = self.script.miss.partial_eval() self.exists = NotOp("not", self.missing).partial_eval() # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM sort_candidates = [ s for s in self.query.sort if s.value == self.edge.value ] if sort_candidates: self.es_order = { "_term": { 1: "asc", -1: "desc" }[sort_candidates[0].sort] } else: self.es_order = None def append_query(self, es_query, start): self.start = start if not isinstance(self.edge.value, Variable): if self.exists is TRUE: # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH) output = wrap({ "aggs": { "_match": set_default( { "terms": { "script": { "lang": "painless", "inline": self.script.expr }, "size": self.domain.limit, "order": self.es_order } }, es_query) } }) else: output = wrap({ "aggs": { "_match": { # _match AND _filter REVERSED SO _match LINES UP WITH _missing "filter": self.exists.to_esfilter(self.schema), "aggs": { "_filter": set_default( { "terms": { "script": { "lang": "painless", "inline": self.script.expr }, "size": self.domain.limit, "order": self.es_order } }, es_query) } }, "_missing": set_default( {"filter": self.missing.to_esfilter(self.schema)}, es_query) } }) return output else: output = wrap({ "aggs": { "_match": set_default( { "terms": { "field": self.schema.leaves( self.edge.value.var)[0].es_column, "size": self.domain.limit, "order": self.es_order } }, es_query), "_missing": set_default( {"filter": self.missing.to_esfilter(self.schema)}, es_query) } }) return output def count(self, row): part = row[self.start] if part['doc_count']: if part.get('key') != None: self.parts.append(self.pull(part.get('key'))) else: self.edge.allowNulls = True # OK! WE WILL ALLOW NULLS def done_count(self): self.edge.domain = self.domain = SimpleSetDomain( partitions=jx.sort(set(self.parts))) self.parts = None self.computed_domain = True def get_index(self, row): if self.computed_domain: try: part = row[self.start] return self.domain.getIndexByKey(self.pull(part.get('key'))) except Exception as e: Log.error("problem", cause=e) else: try: part = row[self.start] key = self.pull(part.get('key')) i = self.key2index.get(key) if i is None: i = len(self.parts) part = {"key": key, "dataIndex": i} self.parts.append(part) self.key2index[key] = i return i except Exception as e: Log.error("problem", cause=e) @property def num_columns(self): return 1
def append_query(self, es_query, start): self.start = start value = self.edge.value.partial_eval() script = value.to_painless(self.schema) exists = NotOp("not", script.miss).partial_eval() if not isinstance(self.edge.value, Variable): output = wrap({"aggs": { "_match": { "filter": exists.to_esfilter(self.schema), "aggs": { "_filter": set_default( {"terms": { "script": { "lang": "painless", "inline": script.expr }, "size": self.domain.limit, "order": {"_term": self.sorted} if self.sorted else None }}, es_query ) } }, "_missing": set_default( {"filter": NotOp("not", exists).to_esfilter(self.schema)}, es_query ) }}) return output elif self.edge.value.var in [s.value.var for s in self.query.sort]: sort_dir = [s.sort for s in self.query.sort if s.value.var == self.edge.value.var][0] output = wrap({"aggs": { "_match": set_default( {"terms": { "field": self.schema.leaves(self.edge.value.var)[0].es_column, "size": self.domain.limit, "order": {"_term": "asc" if sort_dir == 1 else "desc"} }}, es_query ), "_missing": set_default( {"filter": NotOp("not", exists).to_esfilter(self.schema)}, es_query ) }}) return output else: output = wrap({"aggs": { "_match": set_default( {"terms": { "field": self.schema.leaves(self.edge.value.var)[0].es_column, "size": self.domain.limit }}, es_query ), "_missing": set_default( {"filter": NotOp("not", exists).to_esfilter(self.schema)}, es_query ) }}) return output
class DefaultDecoder(SetDecoder): # FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES) def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.domain = edge.domain self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list() self.key2index = {} self.computed_domain = False self.script = self.edge.value.partial_eval().to_es_script(self.schema) self.pull = pull_functions[self.script.data_type] self.missing = self.script.miss.partial_eval() self.exists = NotOp("not", self.missing).partial_eval() # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM sort_candidates = [s for s in self.query.sort if s.value == self.edge.value] if sort_candidates: self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]} else: self.es_order = None def append_query(self, es_query, start): self.start = start if not isinstance(self.edge.value, Variable): if self.exists is TRUE: # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH) output = wrap({"aggs": { "_match": set_default( {"terms": { "script": {"lang": "painless", "inline": self.script.expr}, "size": self.domain.limit, "order": self.es_order }}, es_query ) }}) else: output = wrap({"aggs": { "_match": { # _match AND _filter REVERSED SO _match LINES UP WITH _missing "filter": self.exists.to_esfilter(self.schema), "aggs": { "_filter": set_default( {"terms": { "script": {"lang": "painless", "inline": self.script.expr}, "size": self.domain.limit, "order": self.es_order }}, es_query ) } }, "_missing": set_default( {"filter": self.missing.to_esfilter(self.schema)}, es_query ) }}) return output else: output = wrap({"aggs": { "_match": set_default( {"terms": { "field": self.schema.leaves(self.edge.value.var)[0].es_column, "size": self.domain.limit, "order": self.es_order }}, es_query ), "_missing": set_default( {"filter": self.missing.to_esfilter(self.schema)}, es_query ) }}) return output def count(self, row): part = row[self.start] if part['doc_count']: if part.get('key') != None: self.parts.append(self.pull(part.get('key'))) else: self.edge.allowNulls = True # OK! WE WILL ALLOW NULLS def done_count(self): self.edge.domain = self.domain = SimpleSetDomain( partitions=jx.sort(set(self.parts)) ) self.parts = None self.computed_domain = True def get_index(self, row): if self.computed_domain: try: part = row[self.start] return self.domain.getIndexByKey(self.pull(part.get('key'))) except Exception as e: Log.error("problem", cause=e) else: try: part = row[self.start] key = self.pull(part.get('key')) i = self.key2index.get(key) if i is None: i = len(self.parts) part = {"key": key, "dataIndex": i} self.parts.append(part) self.key2index[key] = i return i except Exception as e: Log.error("problem", cause=e) @property def num_columns(self): return 1