コード例 #1
0
ファイル: set_bulk.py プロジェクト: klahnakoski/pyLibrary
def es_bulksetop(esq, frum, query):
    abs_limit = MIN([query.limit, MAX_DOCUMENTS])
    guid = randoms.base64(32, extra="-_")

    schema = frum.schema
    all_paths, split_decoders, var_to_columns = pre_process(query)
    new_select, split_select, flatten = get_selects(query)
    op, split_wheres = setop_to_es_queries(query, all_paths, split_select, var_to_columns)
    es_query = es_query_proto(split_select, op, split_wheres, schema)
    es_query.size = MIN([query.chunk_size, MAX_CHUNK_SIZE])
    es_query.sort = jx_sort_to_es_sort(query.sort, schema)
    if not es_query.sort:
        es_query.sort = ["_doc"]

    formatter = formatters[query.format](abs_limit, new_select, query)

    Thread.run(
        "Download " + guid,
        extractor,
        guid,
        abs_limit,
        esq,
        es_query,
        formatter,
        parent_thread=Null,
    ).release()

    output = to_data(
        {
            "url": URL_PREFIX / (guid + ".json"),
            "status": URL_PREFIX / (guid + ".status.json"),
            "meta": {"format": query.format, "es_query": es_query, "limit": abs_limit},
        }
    )
    return output
コード例 #2
0
    def __div__(self, amount):
        if isinstance(amount, Duration) and amount.month:
            m = self.month
            r = self.milli

            # DO NOT CONSIDER TIME OF DAY
            tod = r % MILLI_VALUES.day
            r = r - tod

            if m == 0 and r > (MILLI_VALUES.year / 3):
                m = floor(12 * self.milli / MILLI_VALUES.year)
                r -= (m / 12) * MILLI_VALUES.year
            else:
                r = r - (self.month * MILLI_VALUES.month)
                if r >= MILLI_VALUES.day * 31:
                    from mo_logs import Log
                    Log.error("Do not know how to handle")
            r = MIN([29 / 30, (r + tod) / (MILLI_VALUES.day * 30)])

            output = floor(m / amount.month) + r
            return output
        elif is_number(amount):
            output = Duration(0)
            output.milli = self.milli / amount
            output.month = self.month / amount
            return output
        else:
            return self.milli / amount.milli
コード例 #3
0
def _select(template, data, fields, depth):
    output = FlatList()
    deep_path = []
    deep_fields = UniqueIndex(["name"])
    for d in data:
        if d.__class__ is Data:
            Log.error(
                "programmer error, _select can not handle Data, only dict")

        record = template.copy()
        children = None
        for f in fields:
            index, c = _select_deep(d, f, depth, record)
            children = c if children is None else children
            if index:
                path = f.value[0:index:]
                if not deep_fields[f]:
                    deep_fields.add(
                        f)  # KEEP TRACK OF WHICH FIELDS NEED DEEPER SELECT
                short = MIN([len(deep_path), len(path)])
                if path[:short:] != deep_path[:short:]:
                    Log.error(
                        "Dangerous to select into more than one branch at time"
                    )
                if len(deep_path) < len(path):
                    deep_path = path
        if not children:
            output.append(record)
        else:
            output.extend(_select(record, children, deep_fields, depth + 1))

    return output
コード例 #4
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        self.type = "range"
        self.NULL = Null

        if self.partitions:
            # IGNORE THE min, max, interval
            if not self.key:
                Log.error("Must have a key value")

            parts =listwrap(self.partitions)
            for i, p in enumerate(parts):
                self.min = MIN([self.min, p.min])
                self.max = MAX([self.max, p.max])
                if p.dataIndex != None and p.dataIndex != i:
                    Log.error("Expecting `dataIndex` to agree with the order of the parts")
                if p[self.key] == None:
                    Log.error("Expecting all parts to have {{key}} as a property", key=self.key)
                p.dataIndex = i

            # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
            for p, q in itertools.product(parts, parts):
                if p.min <= q.min and q.min < p.max and unwrap(p) is not unwrap(q):
                    Log.error("partitions overlap!")

            self.partitions = wrap(parts)
            return
        elif any([self.min == None, self.max == None, self.interval == None]):
            Log.error("Can not handle missing parameter")

        self.key = "min"
        self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(frange(self.min, self.max, self.interval))])
コード例 #5
0
ファイル: decoders.py プロジェクト: nknick99/MySQL-to-S3
def _range_composer(edge, domain, es_query, to_float, schema):
    # USE RANGES
    _min = coalesce(domain.min, MIN(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if edge.allowNulls:
        missing_filter = set_default(
            {
                "filter": NotOp("not", AndOp("and", [
                    edge.value.exists(),
                    InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]),
                    InequalityOp("lt", [edge.value, Literal(None, to_float(_max))])
                ]).partial_eval()).to_esfilter(schema)
            },
            es_query
        )
    else:
        missing_filter = None

    if isinstance(edge.value, Variable):
        calc = {"field": schema.leaves(edge.value.var)[0].es_column}
    else:
        calc = {"script": edge.value.to_painless(schema).script(schema)}

    return wrap({"aggs": {
        "_match": set_default(
            {"range": calc},
            {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
            es_query
        ),
        "_missing": missing_filter
    }})
コード例 #6
0
ファイル: decoders.py プロジェクト: mars-f/ActiveData
def _range_composer(self, edge, domain, es_query, to_float, schema):
    # USE RANGES
    _min = coalesce(domain.min, MIN(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    output = Aggs()
    if edge.allowNulls:
        output.add(
            FilterAggs(
                "_missing",
                NotOp(
                    AndOp([
                        edge.value.exists(),
                        GteOp([edge.value, Literal(to_float(_min))]),
                        LtOp([edge.value, Literal(to_float(_max))])
                    ]).partial_eval()), self).add(es_query))

    if is_op(edge.value, Variable):
        calc = {"field": first(schema.leaves(edge.value.var)).es_column}
    else:
        calc = {"script": text_type(Painless[edge.value].to_es_script(schema))}
    calc['ranges'] = [{
        "from": to_float(p.min),
        "to": to_float(p.max)
    } for p in domain.partitions]

    return output.add(RangeAggs("_match", calc, self).add(es_query))
コード例 #7
0
ファイル: set_bulk.py プロジェクト: klahnakoski/ActiveData
def es_bulksetop(esq, frum, query):
    abs_limit = MIN([query.limit, MAX_DOCUMENTS])
    guid = Random.base64(32, extra="-_")

    schema = query.frum.schema
    query_path = schema.query_path[0]
    new_select, split_select = get_selects(query)
    split_wheres = split_expression_by_path(query.where, schema, lang=ES52)
    es_query = es_query_proto(query_path, split_select, split_wheres, schema)
    es_query.size = MIN([query.chunk_size, MAX_CHUNK_SIZE])
    es_query.sort = jx_sort_to_es_sort(query.sort, schema)
    if not es_query.sort:
        es_query.sort = ["_doc"]

    formatter = formatters[query.format](abs_limit, new_select, query)

    Thread.run(
        "Download " + guid,
        extractor,
        guid,
        abs_limit,
        esq,
        es_query,
        formatter,
        parent_thread=Null,
    ).release()

    output = wrap({
        "url": URL_PREFIX / (guid + ".json"),
        "status": URL_PREFIX / (guid + ".status.json"),
        "meta": {
            "format": query.format,
            "es_query": es_query,
            "limit": abs_limit
        },
    })
    return output
コード例 #8
0
    def partial_eval(self):
        minimum = None
        terms = []
        for t in self.terms:
            simple = t.partial_eval()
            if simple is NULL:
                pass
            elif is_op(simple, Literal):
                minimum = MIN([minimum, simple.value])
            else:
                terms.append(simple)
        if len(terms) == 0:
            if minimum == None:
                return NULL
            else:
                return Literal(minimum)
        else:
            if minimum == None:
                output = self.lang[UnionOp(terms)]
            else:
                output = self.lang[UnionOp([Literal(minimum)] + terms)]

        return output
コード例 #9
0
ファイル: min_op.py プロジェクト: klahnakoski/jx-python
    def partial_eval(self, lang):
        minimum = None
        terms = []
        for t in self.terms:
            simple = t.partial_eval(lang)
            if is_op(simple, NullOp):
                pass
            elif is_literal(simple):
                minimum = MIN([minimum, simple.value])
            else:
                terms.append(simple)
        if len(terms) == 0:
            if minimum == None:
                return NULL
            else:
                return Literal(minimum)
        else:
            if minimum == None:
                output = (MinOp(terms))
            else:
                output = (MinOp([Literal(minimum)] + terms))

        return output
コード例 #10
0
def _range_composer(self, edge, domain, es_query, to_float, schema):
    # USE RANGES
    _min = coalesce(domain.min, MIN(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    output = Aggs()
    if edge.allowNulls:
        output.add(FilterAggs(
            "_missing",
            NotOp("not", AndOp("and", [
                edge.value.exists(),
                InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]),
                InequalityOp("lt", [edge.value, Literal(None, to_float(_max))])
            ]).partial_eval()),
            self
        ).add(es_query))

    if isinstance(edge.value, Variable):
        calc = {"field": first(schema.leaves(edge.value.var)).es_column}
    else:
        calc = {"script": edge.value.to_es_script(schema).script(schema)}
    calc['ranges'] = [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]

    return output.add(RangeAggs("_match", calc, self).add(es_query))
コード例 #11
0
    def select(self, fields):
        if isinstance(fields, Mapping):
            fields = fields.value

        if isinstance(fields, text_type):
            # RETURN LIST OF VALUES
            if len(split_field(fields)) == 1:
                if self.path[0] == fields:
                    return [d[1] for d in self.data]
                else:
                    return [d[0][fields] for d in self.data]
            else:
                keys = split_field(fields)
                depth = coalesce(
                    MIN([
                        i for i, (k, p) in enumerate(zip(keys, self.path))
                        if k != p
                    ]), len(self.path))  # LENGTH OF COMMON PREFIX
                short_key = keys[depth:]

                output = FlatList()
                _select1((wrap(d[depth]) for d in self.data), short_key, 0,
                         output)
                return output

        if isinstance(fields, list):
            output = FlatList()

            meta = []
            for f in fields:
                if hasattr(f.value, "__call__"):
                    meta.append((f.name, f.value))
                else:
                    meta.append(
                        (f.name, functools.partial(lambda v, d: d[v],
                                                   f.value)))

            for row in self._values():
                agg = Data()
                for name, f in meta:
                    agg[name] = f(row)

                output.append(agg)

            return output

            # meta = []
            # for f in fields:
            #     keys = split_field(f.value)
            #     depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path))  # LENGTH OF COMMON PREFIX
            #     short_key = join_field(keys[depth:])
            #
            #     meta.append((f.name, depth, short_key))
            #
            # for row in self._data:
            #     agg = Data()
            #     for name, depth, short_key in meta:
            #         if short_key:
            #             agg[name] = row[depth][short_key]
            #         else:
            #             agg[name] = row[depth]
            #     output.append(agg)
            # return output

        Log.error("multiselect over FlatList not supported")
コード例 #12
0
 def end(self):
     return MIN(self.total)