def es_bulksetop(esq, frum, query): abs_limit = MIN([query.limit, MAX_DOCUMENTS]) guid = randoms.base64(32, extra="-_") schema = frum.schema all_paths, split_decoders, var_to_columns = pre_process(query) new_select, split_select, flatten = get_selects(query) op, split_wheres = setop_to_es_queries(query, all_paths, split_select, var_to_columns) es_query = es_query_proto(split_select, op, split_wheres, schema) es_query.size = MIN([query.chunk_size, MAX_CHUNK_SIZE]) es_query.sort = jx_sort_to_es_sort(query.sort, schema) if not es_query.sort: es_query.sort = ["_doc"] formatter = formatters[query.format](abs_limit, new_select, query) Thread.run( "Download " + guid, extractor, guid, abs_limit, esq, es_query, formatter, parent_thread=Null, ).release() output = to_data( { "url": URL_PREFIX / (guid + ".json"), "status": URL_PREFIX / (guid + ".status.json"), "meta": {"format": query.format, "es_query": es_query, "limit": abs_limit}, } ) return output
def __div__(self, amount): if isinstance(amount, Duration) and amount.month: m = self.month r = self.milli # DO NOT CONSIDER TIME OF DAY tod = r % MILLI_VALUES.day r = r - tod if m == 0 and r > (MILLI_VALUES.year / 3): m = floor(12 * self.milli / MILLI_VALUES.year) r -= (m / 12) * MILLI_VALUES.year else: r = r - (self.month * MILLI_VALUES.month) if r >= MILLI_VALUES.day * 31: from mo_logs import Log Log.error("Do not know how to handle") r = MIN([29 / 30, (r + tod) / (MILLI_VALUES.day * 30)]) output = floor(m / amount.month) + r return output elif is_number(amount): output = Duration(0) output.milli = self.milli / amount output.month = self.month / amount return output else: return self.milli / amount.milli
def _select(template, data, fields, depth): output = FlatList() deep_path = [] deep_fields = UniqueIndex(["name"]) for d in data: if d.__class__ is Data: Log.error( "programmer error, _select can not handle Data, only dict") record = template.copy() children = None for f in fields: index, c = _select_deep(d, f, depth, record) children = c if children is None else children if index: path = f.value[0:index:] if not deep_fields[f]: deep_fields.add( f) # KEEP TRACK OF WHICH FIELDS NEED DEEPER SELECT short = MIN([len(deep_path), len(path)]) if path[:short:] != deep_path[:short:]: Log.error( "Dangerous to select into more than one branch at time" ) if len(deep_path) < len(path): deep_path = path if not children: output.append(record) else: output.extend(_select(record, children, deep_fields, depth + 1)) return output
def __init__(self, **desc): Domain.__init__(self, **desc) self.type = "range" self.NULL = Null if self.partitions: # IGNORE THE min, max, interval if not self.key: Log.error("Must have a key value") parts =listwrap(self.partitions) for i, p in enumerate(parts): self.min = MIN([self.min, p.min]) self.max = MAX([self.max, p.max]) if p.dataIndex != None and p.dataIndex != i: Log.error("Expecting `dataIndex` to agree with the order of the parts") if p[self.key] == None: Log.error("Expecting all parts to have {{key}} as a property", key=self.key) p.dataIndex = i # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE for p, q in itertools.product(parts, parts): if p.min <= q.min and q.min < p.max and unwrap(p) is not unwrap(q): Log.error("partitions overlap!") self.partitions = wrap(parts) return elif any([self.min == None, self.max == None, self.interval == None]): Log.error("Can not handle missing parameter") self.key = "min" self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(frange(self.min, self.max, self.interval))])
def _range_composer(edge, domain, es_query, to_float, schema): # USE RANGES _min = coalesce(domain.min, MIN(domain.partitions.min)) _max = coalesce(domain.max, MAX(domain.partitions.max)) if edge.allowNulls: missing_filter = set_default( { "filter": NotOp("not", AndOp("and", [ edge.value.exists(), InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]), InequalityOp("lt", [edge.value, Literal(None, to_float(_max))]) ]).partial_eval()).to_esfilter(schema) }, es_query ) else: missing_filter = None if isinstance(edge.value, Variable): calc = {"field": schema.leaves(edge.value.var)[0].es_column} else: calc = {"script": edge.value.to_painless(schema).script(schema)} return wrap({"aggs": { "_match": set_default( {"range": calc}, {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}}, es_query ), "_missing": missing_filter }})
def _range_composer(self, edge, domain, es_query, to_float, schema): # USE RANGES _min = coalesce(domain.min, MIN(domain.partitions.min)) _max = coalesce(domain.max, MAX(domain.partitions.max)) output = Aggs() if edge.allowNulls: output.add( FilterAggs( "_missing", NotOp( AndOp([ edge.value.exists(), GteOp([edge.value, Literal(to_float(_min))]), LtOp([edge.value, Literal(to_float(_max))]) ]).partial_eval()), self).add(es_query)) if is_op(edge.value, Variable): calc = {"field": first(schema.leaves(edge.value.var)).es_column} else: calc = {"script": text_type(Painless[edge.value].to_es_script(schema))} calc['ranges'] = [{ "from": to_float(p.min), "to": to_float(p.max) } for p in domain.partitions] return output.add(RangeAggs("_match", calc, self).add(es_query))
def es_bulksetop(esq, frum, query): abs_limit = MIN([query.limit, MAX_DOCUMENTS]) guid = Random.base64(32, extra="-_") schema = query.frum.schema query_path = schema.query_path[0] new_select, split_select = get_selects(query) split_wheres = split_expression_by_path(query.where, schema, lang=ES52) es_query = es_query_proto(query_path, split_select, split_wheres, schema) es_query.size = MIN([query.chunk_size, MAX_CHUNK_SIZE]) es_query.sort = jx_sort_to_es_sort(query.sort, schema) if not es_query.sort: es_query.sort = ["_doc"] formatter = formatters[query.format](abs_limit, new_select, query) Thread.run( "Download " + guid, extractor, guid, abs_limit, esq, es_query, formatter, parent_thread=Null, ).release() output = wrap({ "url": URL_PREFIX / (guid + ".json"), "status": URL_PREFIX / (guid + ".status.json"), "meta": { "format": query.format, "es_query": es_query, "limit": abs_limit }, }) return output
def partial_eval(self): minimum = None terms = [] for t in self.terms: simple = t.partial_eval() if simple is NULL: pass elif is_op(simple, Literal): minimum = MIN([minimum, simple.value]) else: terms.append(simple) if len(terms) == 0: if minimum == None: return NULL else: return Literal(minimum) else: if minimum == None: output = self.lang[UnionOp(terms)] else: output = self.lang[UnionOp([Literal(minimum)] + terms)] return output
def partial_eval(self, lang): minimum = None terms = [] for t in self.terms: simple = t.partial_eval(lang) if is_op(simple, NullOp): pass elif is_literal(simple): minimum = MIN([minimum, simple.value]) else: terms.append(simple) if len(terms) == 0: if minimum == None: return NULL else: return Literal(minimum) else: if minimum == None: output = (MinOp(terms)) else: output = (MinOp([Literal(minimum)] + terms)) return output
def _range_composer(self, edge, domain, es_query, to_float, schema): # USE RANGES _min = coalesce(domain.min, MIN(domain.partitions.min)) _max = coalesce(domain.max, MAX(domain.partitions.max)) output = Aggs() if edge.allowNulls: output.add(FilterAggs( "_missing", NotOp("not", AndOp("and", [ edge.value.exists(), InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]), InequalityOp("lt", [edge.value, Literal(None, to_float(_max))]) ]).partial_eval()), self ).add(es_query)) if isinstance(edge.value, Variable): calc = {"field": first(schema.leaves(edge.value.var)).es_column} else: calc = {"script": edge.value.to_es_script(schema).script(schema)} calc['ranges'] = [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions] return output.add(RangeAggs("_match", calc, self).add(es_query))
def select(self, fields): if isinstance(fields, Mapping): fields = fields.value if isinstance(fields, text_type): # RETURN LIST OF VALUES if len(split_field(fields)) == 1: if self.path[0] == fields: return [d[1] for d in self.data] else: return [d[0][fields] for d in self.data] else: keys = split_field(fields) depth = coalesce( MIN([ i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p ]), len(self.path)) # LENGTH OF COMMON PREFIX short_key = keys[depth:] output = FlatList() _select1((wrap(d[depth]) for d in self.data), short_key, 0, output) return output if isinstance(fields, list): output = FlatList() meta = [] for f in fields: if hasattr(f.value, "__call__"): meta.append((f.name, f.value)) else: meta.append( (f.name, functools.partial(lambda v, d: d[v], f.value))) for row in self._values(): agg = Data() for name, f in meta: agg[name] = f(row) output.append(agg) return output # meta = [] # for f in fields: # keys = split_field(f.value) # depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path)) # LENGTH OF COMMON PREFIX # short_key = join_field(keys[depth:]) # # meta.append((f.name, depth, short_key)) # # for row in self._data: # agg = Data() # for name, depth, short_key in meta: # if short_key: # agg[name] = row[depth][short_key] # else: # agg[name] = row[depth] # output.append(agg) # return output Log.error("multiselect over FlatList not supported")
def end(self): return MIN(self.total)