Example #1
0
def median(values, simple=True, mean_weight=0.0):
    """
    RETURN MEDIAN VALUE

    IF simple=False THEN IN THE EVENT MULTIPLE INSTANCES OF THE
    MEDIAN VALUE, THE MEDIAN IS INTERPOLATED BASED ON ITS POSITION
    IN THE MEDIAN RANGE

    mean_weight IS TO PICK A MEDIAN VALUE IN THE ODD CASE THAT IS
    CLOSER TO THE MEAN (PICK A MEDIAN BETWEEN TWO MODES IN BIMODAL CASE)
    """

    if OR(v == None for v in values):
        Log.error("median is not ready to handle None")

    try:
        if not values:
            return Null

        l = len(values)
        _sorted = sorted(values)

        middle = int(l / 2)
        _median = float(_sorted[middle])

        if len(_sorted) == 1:
            return _median

        if simple:
            if l % 2 == 0:
                return (_sorted[middle - 1] + _median) / 2
            return _median

        # FIND RANGE OF THE median
        start_index = middle - 1
        while start_index > 0 and _sorted[start_index] == _median:
            start_index -= 1
        start_index += 1
        stop_index = middle + 1
        while stop_index < l and _sorted[stop_index] == _median:
            stop_index += 1

        num_middle = stop_index - start_index

        if l % 2 == 0:
            if num_middle == 1:
                return (_sorted[middle - 1] + _median) / 2
            else:
                return (_median - 0.5) + (middle - start_index) / num_middle
        else:
            if num_middle == 1:
                return (1 - mean_weight) * _median + mean_weight * (
                    _sorted[middle - 1] + _sorted[middle + 1]) / 2
            else:
                return (_median -
                        0.5) + (middle + 0.5 - start_index) / num_middle
    except Exception as e:
        Log.error("problem with median of {{values}}", values=values, cause=e)
Example #2
0
 def _select(self, select):
     selects = listwrap(select)
     is_aggregate = OR(s.aggregate != None and s.aggregate != "none" for s in selects)
     if is_aggregate:
         values = {s.name: Matrix(value=self.data[s.value].aggregate(s.aggregate)) for s in selects}
         return Cube(select, [], values)
     else:
         values = {s.name: self.data[s.value] for s in selects}
         return Cube(select, self.edges, values)
Example #3
0
    def __init__(self, select, edges, data, frum=None):
        """
        data IS EXPECTED TO BE A dict TO MATRICES, BUT OTHER COLLECTIONS ARE
        ALLOWED, USING THE select AND edges TO DESCRIBE THE data
        """

        self.is_value = False if isinstance(select, list) else True
        self.select = select
        self.meta = Data(format="cube")       # PUT EXTRA MARKUP HERE
        self.is_none = False

        if not all(data.values()):
            is_none = True

        # ENSURE frum IS PROPER FORM
        if isinstance(select, list):
            if edges and OR(not isinstance(v, Matrix) for v in data.values()):
                Log.error("Expecting data to be a dict with Matrix values")

        if not edges:
            if not data:
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix.ZERO}
                self.edges = FlatList.EMPTY
            elif isinstance(data, Mapping):
                # EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA
                length = MAX([len(v) for v in data.values()])
                if length >= 1:
                    self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum"}}])
                else:
                    self.edges = FlatList.EMPTY
            elif isinstance(data, list):
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix.wrap(data)}
                self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(data), "interval": 1}}])
            elif isinstance(data, Matrix):
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: data}
            else:
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix(value=data)}
                self.edges = FlatList.EMPTY
        else:
            self.edges = wrap(edges)

        self.data = data
Example #4
0
def pretty_json(value):
    try:
        if value is False:
            return "false"
        elif value is True:
            return "true"
        elif isinstance(value, Mapping):
            try:
                items = sort_using_key(list(value.items()), lambda r: r[0])
                values = [
                    encode_basestring(k) + PRETTY_COLON +
                    indent(pretty_json(v)).strip() for k, v in items
                    if v != None
                ]
                if not values:
                    return "{}"
                elif len(values) == 1:
                    return "{" + values[0] + "}"
                else:
                    return "{\n" + INDENT + (",\n" +
                                             INDENT).join(values) + "\n}"
            except Exception as e:
                from mo_logs import Log
                from mo_math import OR

                if OR(not isinstance(k, text_type) for k in value.keys()):
                    Log.error("JSON must have string keys: {{keys}}:",
                              keys=[k for k in value.keys()],
                              cause=e)

                Log.error("problem making dict pretty: keys={{keys}}:",
                          keys=[k for k in value.keys()],
                          cause=e)
        elif value in (None, Null):
            return "null"
        elif isinstance(value, (text_type, binary_type)):
            if isinstance(value, binary_type):
                value = utf82unicode(value)
            try:
                return quote(value)
            except Exception as e:
                from mo_logs import Log

                try:
                    Log.note(
                        "try explicit convert of string with length {{length}}",
                        length=len(value))
                    acc = [QUOTE]
                    for c in value:
                        try:
                            try:
                                c2 = ESCAPE_DCT[c]
                            except Exception:
                                c2 = c
                            c3 = text_type(c2)
                            acc.append(c3)
                        except BaseException:
                            pass
                            # Log.warning("odd character {{ord}} found in string.  Ignored.",  ord= ord(c)}, cause=g)
                    acc.append(QUOTE)
                    output = u"".join(acc)
                    Log.note("return value of length {{length}}",
                             length=len(output))
                    return output
                except BaseException as f:
                    Log.warning("can not even explicit convert {{type}}",
                                type=f.__class__.__name__,
                                cause=f)
                    return "null"
        elif isinstance(value, list):
            if not value:
                return "[]"

            if ARRAY_MAX_COLUMNS == 1:
                return "[\n" + ",\n".join(
                    [indent(pretty_json(v)) for v in value]) + "\n]"

            if len(value) == 1:
                j = pretty_json(value[0])
                if j.find("\n") >= 0:
                    return "[\n" + indent(j) + "\n]"
                else:
                    return "[" + j + "]"

            js = [pretty_json(v) for v in value]
            max_len = max(*[len(j) for j in js])
            if max_len <= ARRAY_ITEM_MAX_LENGTH and max(
                    *[j.find("\n") for j in js]) == -1:
                # ALL TINY VALUES
                num_columns = max(
                    1,
                    min(
                        ARRAY_MAX_COLUMNS,
                        int(
                            floor((ARRAY_ROW_LENGTH + 2.0) /
                                  float(max_len +
                                        2)))))  # +2 TO COMPENSATE FOR COMMAS
                if len(js) <= num_columns:  # DO NOT ADD \n IF ONLY ONE ROW
                    return "[" + PRETTY_COMMA.join(js) + "]"
                if num_columns == 1:  # DO NOT rjust IF THERE IS ONLY ONE COLUMN
                    return "[\n" + ",\n".join(
                        [indent(pretty_json(v)) for v in value]) + "\n]"

                content = ",\n".join(
                    PRETTY_COMMA.join(
                        j.rjust(max_len) for j in js[r:r + num_columns])
                    for r in xrange(0, len(js), num_columns))
                return "[\n" + indent(content) + "\n]"

            pretty_list = js

            output = ["[\n"]
            for i, p in enumerate(pretty_list):
                try:
                    if i > 0:
                        output.append(",\n")
                    output.append(indent(p))
                except Exception:
                    from mo_logs import Log

                    Log.warning(
                        "problem concatenating string of length {{len1}} and {{len2}}",
                        len1=len("".join(output)),
                        len2=len(p))
            output.append("\n]")
            try:
                return "".join(output)
            except Exception as e:
                from mo_logs import Log

                Log.error("not expected", cause=e)
        elif hasattr(value, '__data__'):
            d = value.__data__()
            return pretty_json(d)
        elif hasattr(value, '__json__'):
            j = value.__json__()
            if j == None:
                return "   null   "  # TODO: FIND OUT WHAT CAUSES THIS
            return pretty_json(json_decoder(j))
        elif scrub(value) is None:
            return "null"
        elif hasattr(value, '__iter__'):
            return pretty_json(list(value))
        elif hasattr(value, '__call__'):
            return "null"
        else:
            try:
                if int(value) == value:
                    return text_type(int(value))
            except Exception:
                pass

            try:
                if float(value) == value:
                    return text_type(float(value))
            except Exception:
                pass

            return pypy_json_encode(value)

    except Exception as e:
        problem_serializing(value, e)
Example #5
0
def _normalize(esfilter):
    """
    TODO: DO NOT USE Data, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING
    REALLY, WE JUST COLLAPSE CASCADING `and` AND `or` FILTERS
    """
    if esfilter == MATCH_ALL or esfilter == MATCH_NONE or esfilter.isNormal:
        return esfilter

    # Log.note("from: " + convert.value2json(esfilter))
    isDiff = True

    while isDiff:
        isDiff = False

        if esfilter.bool.must:
            terms = esfilter.bool.must
            for (i0, t0), (i1,
                           t1) in itertools.product(enumerate(terms),
                                                    enumerate(terms)):
                if i0 == i1:
                    continue  # SAME, IGNORE
                # TERM FILTER ALREADY ASSUMES EXISTENCE
                with suppress_exception:
                    if t0.exists.field != None and t0.exists.field == t1.term.items(
                    )[0][0]:
                        terms[i0] = MATCH_ALL
                        continue

                # IDENTICAL CAN BE REMOVED
                with suppress_exception:
                    if t0 == t1:
                        terms[i0] = MATCH_ALL
                        continue

                # MERGE range FILTER WITH SAME FIELD
                if i0 > i1:
                    continue  # SAME, IGNORE
                with suppress_exception:
                    f0, tt0 = t0.range.items()[0]
                    f1, tt1 = t1.range.items()[0]
                    if f0 == f1:
                        set_default(terms[i0].range[literal_field(f1)], tt1)
                        terms[i1] = MATCH_ALL

            output = []
            for a in terms:
                if isinstance(a, (list, set)):
                    from mo_logs import Log

                    Log.error("and clause is not allowed a list inside a list")
                a_ = _normalize(a)
                if a_ is not a:
                    isDiff = True
                a = a_
                if a == MATCH_ALL:
                    isDiff = True
                    continue
                if a == MATCH_NONE:
                    return MATCH_NONE
                if a.bool.must:
                    isDiff = True
                    a.isNormal = None
                    output.extend(a.bool.must)
                else:
                    a.isNormal = None
                    output.append(a)
            if not output:
                return MATCH_ALL
            elif len(output) == 1:
                # output[0].isNormal = True
                esfilter = output[0]
                break
            elif isDiff:
                esfilter = wrap({"and": output})
            continue

        if esfilter.bool.should:
            output = []
            for a in esfilter.bool.should:
                a_ = _normalize(a)
                if a_ is not a:
                    isDiff = True
                a = a_

                if a.bool.should:
                    a.isNormal = None
                    isDiff = True
                    output.extend(a.bool.should)
                else:
                    a.isNormal = None
                    output.append(a)
            if not output:
                return MATCH_NONE
            elif len(output) == 1:
                esfilter = output[0]
                break
            elif isDiff:
                esfilter = wrap({"bool": {"should": output}})
            continue

        if esfilter.term != None:
            if esfilter.term.keys():
                esfilter.isNormal = True
                return esfilter
            else:
                return MATCH_ALL

        if esfilter.terms:
            for k, v in esfilter.terms.items():
                if len(v) > 0:
                    if OR(vv == None for vv in v):
                        rest = [vv for vv in v if vv != None]
                        if len(rest) > 0:
                            return {
                                "or": [{
                                    "missing": {
                                        "field": k
                                    }
                                }, {
                                    "terms": {
                                        k: rest
                                    }
                                }],
                                "isNormal":
                                True
                            }
                        else:
                            return {"missing": {"field": k}, "isNormal": True}
                    else:
                        esfilter.isNormal = True
                        return esfilter
            return MATCH_NONE

        if esfilter['not']:
            _sub = esfilter['not']
            sub = _normalize(_sub)
            if sub == MATCH_NONE:
                return MATCH_ALL
            elif sub == MATCH_ALL:
                return MATCH_NONE
            elif sub is not _sub:
                sub.isNormal = None
                return wrap({"not": sub, "isNormal": True})
            else:
                sub.isNormal = None

    esfilter.isNormal = True
    return esfilter
Example #6
0
def pretty_json(value):
    try:
        if value is False:
            return "false"
        elif value is True:
            return "true"
        elif isinstance(value, Mapping):
            try:
                if not value:
                    return "{}"
                items = list(value.items())
                if len(items) == 1:
                    return "{" + unicode_key(items[0][0]) + ": " + pretty_json(
                        items[0][1]).strip() + "}"

                items = sorted(items, lambda a, b: value_compare(a[0], b[0]))
                values = [
                    unicode_key(k) + ": " + indent(pretty_json(v)).strip()
                    for k, v in items if v != None
                ]
                return "{\n" + INDENT + (",\n" + INDENT).join(values) + "\n}"
            except Exception as e:
                from mo_logs import Log
                from mo_math import OR

                if OR(not isinstance(k, basestring) for k in value.keys()):
                    Log.error("JSON must have string keys: {{keys}}:",
                              keys=[k for k in value.keys()],
                              cause=e)

                Log.error("problem making dict pretty: keys={{keys}}:",
                          keys=[k for k in value.keys()],
                          cause=e)
        elif value in (None, Null):
            return "null"
        elif isinstance(value, basestring):
            if isinstance(value, str):
                value = utf82unicode(value)
            try:
                return quote(value)
            except Exception as e:
                from mo_logs import Log

                try:
                    Log.note(
                        "try explicit convert of string with length {{length}}",
                        length=len(value))
                    acc = [u"\""]
                    for c in value:
                        try:
                            try:
                                c2 = ESCAPE_DCT[c]
                            except Exception:
                                c2 = c
                            c3 = unicode(c2)
                            acc.append(c3)
                        except BaseException:
                            pass
                            # Log.warning("odd character {{ord}} found in string.  Ignored.",  ord= ord(c)}, cause=g)
                    acc.append(u"\"")
                    output = u"".join(acc)
                    Log.note("return value of length {{length}}",
                             length=len(output))
                    return output
                except BaseException, f:
                    Log.warning("can not even explicit convert {{type}}",
                                type=f.__class__.__name__,
                                cause=f)
                    return "null"
        elif isinstance(value, list):
            if not value:
                return "[]"

            if ARRAY_MAX_COLUMNS == 1:
                return "[\n" + ",\n".join(
                    [indent(pretty_json(v)) for v in value]) + "\n]"

            if len(value) == 1:
                j = pretty_json(value[0])
                if j.find("\n") >= 0:
                    return "[\n" + indent(j) + "\n]"
                else:
                    return "[" + j + "]"

            js = [pretty_json(v) for v in value]
            max_len = max(*[len(j) for j in js])
            if max_len <= ARRAY_ITEM_MAX_LENGTH and max(
                    *[j.find("\n") for j in js]) == -1:
                # ALL TINY VALUES
                num_columns = max(
                    1,
                    min(
                        ARRAY_MAX_COLUMNS,
                        int(
                            floor((ARRAY_ROW_LENGTH + 2.0) /
                                  float(max_len +
                                        2)))))  # +2 TO COMPENSATE FOR COMMAS
                if len(js) <= num_columns:  # DO NOT ADD \n IF ONLY ONE ROW
                    return "[" + ", ".join(js) + "]"
                if num_columns == 1:  # DO NOT rjust IF THERE IS ONLY ONE COLUMN
                    return "[\n" + ",\n".join(
                        [indent(pretty_json(v)) for v in value]) + "\n]"

                content = ",\n".join(", ".join(
                    j.rjust(max_len) for j in js[r:r + num_columns])
                                     for r in xrange(0, len(js), num_columns))
                return "[\n" + indent(content) + "\n]"

            pretty_list = js

            output = ["[\n"]
            for i, p in enumerate(pretty_list):
                try:
                    if i > 0:
                        output.append(",\n")
                    output.append(indent(p))
                except Exception:
                    from mo_logs import Log

                    Log.warning(
                        "problem concatenating string of length {{len1}} and {{len2}}",
                        len1=len("".join(output)),
                        len2=len(p))
            output.append("\n]")
            try:
                return "".join(output)
            except Exception as e:
                from mo_logs import Log

                Log.error("not expected", cause=e)
Example #7
0
def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(
        query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([
        s.value == None and s.aggregate not in ("count", "none")
        for s in select
    ])  # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex:
        if len(select) == 1 and isinstance(select[0].value, LeavesOp):
            FromES = wrap({
                "query": {
                    "bool": {
                        "query": {
                            "match_all": {}
                        },
                        "filter": query.where.to_esfilter()
                    }
                },
                "sort": query.sort,
                "size": 0
            })
        elif all(isinstance(v, Variable) for v in select.value):
            FromES = wrap({
                "query": {
                    "bool": {
                        "query": {
                            "match_all": {}
                        },
                        "filter": query.where.to_esfilter()
                    }
                },
                "fields": select.value,
                "sort": query.sort,
                "size": coalesce(query.limit, 200000)
            })
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter": jx_expression(query.where).to_esfilter()
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter": jx_expression(query.where).to_esfilter()
        }

    data = es_post(es, FromES, query.limit)

    if len(select) == 1 and isinstance(select[0].value, LeavesOp):
        # SPECIAL CASE FOR SINGLE COUNT
        cube = wrap(data).hits.hits._source
    elif isinstance(select[0].value, Variable):
        # SPECIAL CASE FOR SINGLE TERM
        cube = wrap(data).hits.hits.fields
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = transpose(*data_list)
            cube = Cube(
                select, [],
                {s.name: Matrix(list=output[i])
                 for i, s in enumerate(select)})

    return Data(meta={"esquery": FromES}, data=cube)