def __init__(self, name, params, cwd=None, env=None, debug=False, shell=False, bufsize=-1):
        self.name = name
        self.service_stopped = Signal("stopped signal for " + strings.quote(name))
        self.stdin = Queue("stdin for process " + strings.quote(name), silent=True)
        self.stdout = Queue("stdout for process " + strings.quote(name), silent=True)
        self.stderr = Queue("stderr for process " + strings.quote(name), silent=True)

        try:
            self.debug = debug or DEBUG
            self.service = service = subprocess.Popen(
                params,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                bufsize=bufsize,
                cwd=cwd if isinstance(cwd, (basestring, NullType, NoneType)) else cwd.abspath,
                env=unwrap(set_default(env, os.environ)),
                shell=shell
            )

            self.please_stop = Signal()
            self.please_stop.on_go(self._kill)
            self.thread_locker = Lock()
            self.children = [
                Thread.run(self.name + " stdin", self._writer, service.stdin, self.stdin, please_stop=self.service_stopped, parent_thread=self),
                Thread.run(self.name + " stdout", self._reader, "stdout", service.stdout, self.stdout, please_stop=self.service_stopped, parent_thread=self),
                Thread.run(self.name + " stderr", self._reader, "stderr", service.stderr, self.stderr, please_stop=self.service_stopped, parent_thread=self),
                Thread.run(self.name + " waiter", self._monitor, parent_thread=self),
            ]
        except Exception as e:
            Log.error("Can not call", e)

        if self.debug:
            Log.note("{{process}} START: {{command}}", process=self.name, command=" ".join(map(strings.quote, params)))
Beispiel #2
0
def quote_column(column_name, table=None):
    if not isinstance(column_name, unicode):
        Log.error("expecting a name")
    if table != None:
        return SQL(quote(table) + "." + quote(column_name))
    else:
        if _no_need_to_quote.match(column_name):
            return SQL(column_name)
        return SQL(quote(column_name))
Beispiel #3
0
 def _convert(v):
     if v is None:
         return NULL.to_es_script(schema)
     if v is True:
         return EsScript(
             type=BOOLEAN,
             expr="true",
             frum=self
         )
     if v is False:
         return EsScript(
             type=BOOLEAN,
             expr="false",
             frum=self
         )
     if isinstance(v, text_type):
         return EsScript(
             type=STRING,
             expr=quote(v),
             frum=self
         )
     if isinstance(v, int):
         return EsScript(
             type=INTEGER,
             expr=text_type(v),
             frum=self
         )
     if isinstance(v, float):
         return EsScript(
             type=NUMBER,
             expr=text_type(v),
             frum=self
         )
     if isinstance(v, dict):
         return EsScript(
             type=OBJECT,
             expr="[" + ", ".join(quote(k) + ": " + _convert(vv) for k, vv in v.items()) + "]",
             frum=self
         )
     if isinstance(v, (list, tuple)):
         return EsScript(
             type=OBJECT,
             expr="[" + ", ".join(_convert(vv).expr for vv in v) + "]",
             frum=self
         )
     if isinstance(v, Date):
         return EsScript(
             type=NUMBER,
             expr=text_type(v.unix),
             frum=self
         )
Beispiel #4
0
 def to_es_script(self, schema, not_null=False, boolean=False, many=True):
     if is_op(self.expr, Variable_):
         if self.expr.var == "_id":
             return EsScript(type=BOOLEAN, expr="false", frum=self, schema=schema)
         else:
             columns = schema.leaves(self.expr.var)
             return (
                 AndOp(
                     [
                         EsScript(
                             type=BOOLEAN,
                             expr="doc[" + quote(c.es_column) + "].empty",
                             frum=self,
                             schema=schema,
                         )
                         for c in columns
                     ]
                 )
                 .partial_eval()
                 .to_es_script(schema)
             )
     elif is_literal(self.expr):
         return self.expr.missing().to_es_script(schema)
     else:
         return self.expr.missing().partial_eval().to_es_script(schema)
Beispiel #5
0
 def to_python(self, not_null=False, boolean=False, many=False):
     return (
         "re.match("
         + quote(json2value(self.pattern.json) + "$")
         + ", "
         + Python[self.var].to_python()
         + ")"
     )
Beispiel #6
0
def unicode_key(key):
    """
    CONVERT PROPERTY VALUE TO QUOTED NAME OF SAME
    """
    if not isinstance(key, (text, binary_type)):
        from mo_logs import Log
        Log.error("{{key|quote}} is not a valid key", key=key)
    return quote(text(key))
Beispiel #7
0
def unicode_key(key):
    """
    CONVERT PROPERTY VALUE TO QUOTED NAME OF SAME
    """
    if not isinstance(key, (text_type, binary_type)):
        from mo_logs import Log
        Log.error("{{key|quote}} is not a valid key", key=key)
    return quote(text_type(key))
Beispiel #8
0
def to_python(self, not_null=False, boolean=False, many=False):
    return (
        "wrap_leaves({" +
        ','.join(
            quote(t['name']) + ":" + t['value'].to_python() for t in self.terms
        ) +
        "})"
    )
Beispiel #9
0
 def _convert(v):
     if v is None:
         return NULL.to_ruby(schema)
     if v is True:
         return Ruby(
             type=BOOLEAN,
             expr="true",
             frum=self
         )
     if v is False:
         return Ruby(
             type=BOOLEAN,
             expr="false",
             frum=self
         )
     if isinstance(v, text_type):
         return Ruby(
             type=STRING,
             expr=quote(v),
             frum=self
         )
     if isinstance(v, int):
         return Ruby(
             type=INTEGER,
             expr=text_type(v),
             frum=self
         )
     if isinstance(v, float):
         return Ruby(
             type=NUMBER,
             expr=text_type(v),
             frum=self
         )
     if isinstance(v, dict):
         return Ruby(
             type=OBJECT,
             expr="[" + ", ".join(quote(k) + ": " + _convert(vv) for k, vv in v.items()) + "]",
             frum=self
         )
     if isinstance(v, (list, tuple)):
         return Ruby(
             type=OBJECT,
             expr="[" + ", ".join(_convert(vv).expr for vv in v) + "]",
             frum=self
         )
Beispiel #10
0
 def to_python(self, not_null=False, boolean=False, many=False):
     return (
         "wrap_leaves({"
         + ",".join(
             quote(t["name"]) + ":" + Python[t["value"]].to_python()
             for t in self.terms
         )
         + "})"
     )
Beispiel #11
0
def value2query(value):
    if isinstance(value, datetime):
        return convert.datetime2milli(value)
    if isinstance(value, Duration):
        return value.milli

    if Math.is_number(value):
        return value
    return quote(value)
Beispiel #12
0
def value2query(value):
    if isinstance(value, datetime):
        return convert.datetime2milli(value)
    if isinstance(value, Duration):
        return value.milli

    if Math.is_number(value):
        return value
    return quote(value)
Beispiel #13
0
def quote_column(*path):
    if not path:
        Log.error("expecting a name")
    if any(not is_text(p) for p in path):
        Log.error("expecting strings, not SQL")
    try:
        return ConcatSQL((SQL_SPACE, JoinSQL(SQL_DOT, [SQL(quote(p)) for p in path]), SQL_SPACE))
    except Exception as e:
        Log.error("Not expacted", cause=e)
 def to_python(self, not_null=False, boolean=False, many=False):
     return (
         "leaves_to_data({"
         + ",".join(
             quote(t["name"]) + ":" + Python[t["value"]].to_python()
             for t in self.terms
         )
         + "})"
     )
Beispiel #15
0
        def _convert(v):
            if v is None:
                return null_script
            if v is True:
                return true_script
            if v is False:
                return false_script
            class_ = v.__class__
            if class_ is text_type:
                return EsScript(type=STRING, expr=quote(v), frum=self, schema=schema)
            if class_ in integer_types:
                if MIN_INT32 <= v <= MAX_INT32:
                    return EsScript(
                        type=INTEGER, expr=text_type(v), frum=self, schema=schema
                    )
                else:
                    return EsScript(
                        type=INTEGER, expr=text_type(v) + "L", frum=self, schema=schema
                    )

            if class_ is float:
                return EsScript(
                    type=NUMBER, expr=text_type(v) + "D", frum=self, schema=schema
                )
            if class_ in data_types:
                return EsScript(
                    type=OBJECT,
                    expr="["
                    + ", ".join(quote(k) + ": " + _convert(vv) for k, vv in v.items())
                    + "]",
                    frum=self,
                    schema=schema,
                )
            if class_ in (FlatList, list, tuple):
                return EsScript(
                    type=OBJECT,
                    expr="[" + ", ".join(_convert(vv).expr for vv in v) + "]",
                    frum=self,
                    schema=schema,
                )
            if class_ is Date:
                return EsScript(
                    type=NUMBER, expr=text_type(v.unix), frum=self, schema=schema
                )
Beispiel #16
0
def to_ruby(self, schema, not_null=False, boolean=True):
    if isinstance(self.expr, Variable):
        if self.expr.var == "_id":
            return Ruby(type=BOOLEAN, expr="false", frum=self)
        else:
            columns = schema.leaves(self.expr.var)
            if len(columns) == 1:
                return Ruby(type=BOOLEAN, expr="doc[" + quote(columns[0].es_column) + "].isEmpty()", frum=self)
            else:
                return AndOp("and", [
                    Ruby(
                        type=BOOLEAN,
                        expr="doc[" + quote(c.es_column) + "].isEmpty()",
                        frum=self
                    )
                    for c in columns
                ]).partial_eval().to_ruby(schema)
    else:
        return self.expr.missing().partial_eval().to_ruby(schema)
Beispiel #17
0
 def to_python(self, not_null=False, boolean=False, many=False):
     return (
         "(("
         + quote(self.substring)
         + " in "
         + Python[self.var].to_python()
         + ") if "
         + Python[self.var].to_python()
         + "!=None else False)"
     )
Beispiel #18
0
    def append_query(self, es_query, start):
        # TODO: USE "reverse_nested" QUERY TO PULL THESE

        self.start = start
        for i, v in enumerate(self.values):
            es_query = wrap({
                "aggs": {
                    "_match":
                    set_default(
                        {
                            "terms": {
                                "script":
                                'doc[' + quote(self.var) +
                                '].values.contains(' + quote(v) + ') ? 1 : 0'
                            }
                        }, es_query)
                }
            })

        return es_query
Beispiel #19
0
    def append_query(self, es_query, start):
        self.start = start

        es_field = self.query.frum.schema.leaves(self.var)[0].es_column
        es_query = wrap({"aggs": {
            "_match": set_default({"terms": {
                "script":  expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
            }}, es_query)
        }})

        return es_query
Beispiel #20
0
    def append_query(self, query_path, es_query):
        es_field = first(self.query.frum.schema.leaves(self.var)).es_column

        return Aggs().add(
            TermsAggs(
                "_match", {
                    "script":
                    expand_template(
                        LIST_TO_PIPE,
                        {"expr": 'doc[' + quote(es_field) + '].values'})
                }, self).add(es_query))
Beispiel #21
0
    def append_query(self, es_query, start):
        self.start = start

        es_field = self.query.frum.schema.leaves(self.var)[0].es_column
        es_query = wrap({"aggs": {
            "_match": set_default({"terms": {
                "script":  expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
            }}, es_query)
        }})

        return es_query
Beispiel #22
0
 def test_unicode1(self):
     value = {
         "comment":
         u"Open all links in the current tab, except the pages opened from external apps — open these ones in new windows"
     }
     test1 = typed_encode(value)
     expected = u'{"comment":{' + quote(
         STRING_KEY
     ) + u':"Open all links in the current tab, except the pages opened from external apps — open these ones in new windows"},' + quote(
         EXISTS_KEY) + u':1}'
     self.assertEqual(test1, expected)
Beispiel #23
0
def value2MVEL(value):
    """
    FROM PYTHON VALUE TO MVEL EQUIVALENT
    """
    if isinstance(value, datetime):
        return str(convert.datetime2milli(value)) + " /*" + value.format("yyNNNdd HHmmss") + "*/"        # TIME
    if isinstance(value, Duration):
        return str(convert.timedelta2milli(value)) + " /*" + str(value) + "*/"    # DURATION

    if Math.is_number(value):
        return str(value)
    return quote(value)
def to_es14_script(self, schema, not_null=False, boolean=False, many=True):
    if isinstance(self.expr, Variable):
        if self.expr.var == "_id":
            return EsScript(type=BOOLEAN, expr="false", frum=self)
        else:
            columns = schema.leaves(self.expr.var)
            if len(columns) == 1:
                return EsScript(type=BOOLEAN, expr="doc[" + quote(first(columns).es_column) + "].isEmpty()", frum=self)
            else:
                return AndOp("and", [
                    EsScript(
                        type=BOOLEAN,
                        expr="doc[" + quote(c.es_column) + "].isEmpty()",
                        frum=self
                    )
                    for c in columns
                ]).partial_eval().to_es14_script(schema)
    elif isinstance(self.expr, Literal):
        return self.expr.missing().to_es14_script(schema)
    else:
        return self.expr.missing().partial_eval().to_es14_script(schema)
Beispiel #25
0
def to_es_script(self, schema, not_null=False, boolean=True):
    if isinstance(self.expr, Variable):
        if self.expr.var == "_id":
            return EsScript(type=BOOLEAN, expr="false", frum=self)
        else:
            columns = schema.leaves(self.expr.var)
            if len(columns) == 1:
                return EsScript(type=BOOLEAN, expr="doc[" + quote(columns[0].es_column) + "].isEmpty()", frum=self)
            else:
                return AndOp("and", [
                    EsScript(
                        type=BOOLEAN,
                        expr="doc[" + quote(c.es_column) + "].isEmpty()",
                        frum=self
                    )
                    for c in columns
                ]).partial_eval().to_es_script(schema)
    elif isinstance(self.expr, Literal):
        return self.expr.missing().to_es_script(schema)
    else:
        return self.expr.missing().partial_eval().to_es_script(schema)
def value2MVEL(value):
    """
    FROM PYTHON VALUE TO MVEL EQUIVALENT
    """
    if isinstance(value, datetime):
        return str(convert.datetime2milli(value)) + " /*" + value.format("yyNNNdd HHmmss") + "*/"        # TIME
    if isinstance(value, Duration):
        return str(convert.timedelta2milli(value)) + " /*" + str(value) + "*/"    # DURATION

    if Math.is_number(value):
        return str(value)
    return quote(value)
Beispiel #27
0
    def append_query(self, es_query, start):
        self.start = start

        es_field = self.query.frum.schema.leaves(self.var)[0].es_column
        for i, v in enumerate(self.values):
            es_query =  wrap({"aggs": {
                "_match": set_default({"terms": {
                    "script": 'doc['+quote(es_field)+'].values.contains(' + value2json(v) + ') ? 1 : 0'
                }}, es_query)
            }})

        return es_query
Beispiel #28
0
def md5(source, chunk_size=CHUNK_SIZE):
    md5s = []
    for g, data in jx.chunk(source.read_bytes(), size=chunk_size):
        md5s.append(hashlib.md5(data).digest())

    if len(md5s) == 0:
        return '"d41d8cd98f00b204e9800998ecf8427e"'
    elif len(md5s) == 1:
        return quote(md5s[0].encode("hex"))
    else:
        Log.warning("not known to work")
        new_md5 = hashlib.md5(b"".join(md5s))
        return unicode(new_md5.hexdigest() + b"-" + str(len(md5s)))
Beispiel #29
0
    def to_es_script(self, schema, not_null=False, boolean=False, many=True):
        if self.var == ".":
            return EsScript(type=OBJECT, expr="_source", frum=self)
        else:
            if self.var == "_id":
                return EsScript(
                    type=STRING,
                    expr='doc["_uid"].value.substring(doc["_uid"].value.indexOf(\'#\')+1)',
                    frum=self,
                    schema=schema,
                )

            columns = schema.values(self.var)
            acc = []
            for c in columns:
                varname = c.es_column
                frum = Variable(c.es_column)
                q = quote(varname)
                if many:
                    acc.append(
                        EsScript(
                            miss=frum.missing(),
                            type=c.jx_type,
                            expr="doc[" + q + "].values"
                            if c.jx_type != BOOLEAN
                            else "doc[" + q + "].value",
                            frum=frum,
                            schema=schema,
                            many=c.jx_type != BOOLEAN,
                        )
                    )
                else:
                    acc.append(
                        EsScript(
                            miss=frum.missing(),
                            type=c.jx_type,
                            expr="doc[" + q + "].value"
                            if c.jx_type != BOOLEAN
                            else "doc[" + q + "].value",
                            frum=frum,
                            schema=schema,
                            many=True,
                        )
                    )

            if len(acc) == 0:
                return NULL.to_es_script(schema)
            elif len(acc) == 1:
                return acc[0]
            else:
                return CoalesceOp(acc).to_es_script(schema)
Beispiel #30
0
    def to_es_script(self, schema, not_null=False, boolean=False, many=True):
        if self.var == ".":
            return EsScript(type=OBJECT, expr="_source", frum=self)
        else:
            if self.var == "_id":
                return EsScript(
                    type=STRING,
                    expr='doc["_uid"].value.substring(doc["_uid"].value.indexOf(\'#\')+1)',
                    frum=self,
                    schema=schema,
                )

            columns = schema.values(self.var)
            acc = []
            for c in columns:
                varname = c.es_column
                frum = Variable(c.es_column)
                q = quote(varname)
                if many:
                    acc.append(
                        EsScript(
                            miss=frum.missing(),
                            type=c.jx_type,
                            expr="doc[" + q + "].values"
                            if c.jx_type != BOOLEAN
                            else "doc[" + q + "].value",
                            frum=frum,
                            schema=schema,
                            many=c.jx_type != BOOLEAN,
                        )
                    )
                else:
                    acc.append(
                        EsScript(
                            miss=frum.missing(),
                            type=c.jx_type,
                            expr="doc[" + q + "].value"
                            if c.jx_type != BOOLEAN
                            else "doc[" + q + "].value",
                            frum=frum,
                            schema=schema,
                            many=True,
                        )
                    )

            if len(acc) == 0:
                return NULL.to_es_script(schema)
            elif len(acc) == 1:
                return acc[0]
            else:
                return CoalesceOp(acc).to_es_script(schema)
Beispiel #31
0
 def _convert(v):
     if v is None:
         return null_script
     if v is True:
         return true_script
     if v is False:
         return false_script
     class_ = v.__class__
     if class_ is text_type:
         return EsScript(type=STRING, expr=quote(v), frum=self, schema=schema)
     if class_ is int:
         return EsScript(
             type=INTEGER, expr=text_type(v), frum=self, schema=schema
         )
     if class_ is float:
         return EsScript(
             type=NUMBER, expr=text_type(v), frum=self, schema=schema
         )
     if class_ is data_types:
         return EsScript(
             type=OBJECT,
             expr="["
             + ", ".join(quote(k) + ": " + _convert(vv) for k, vv in v.items())
             + "]",
             frum=self,
             schema=schema,
         )
     if class_ in (FlatList, list, tuple):
         return EsScript(
             type=OBJECT,
             expr="[" + ", ".join(_convert(vv).expr for vv in v) + "]",
             frum=self,
             schema=schema,
         )
     if class_ is Date:
         return EsScript(
             type=NUMBER, expr=text_type(v.unix), frum=self, schema=schema
         )
Beispiel #32
0
def compileString2Term(edge):
    if edge.esscript:
        Log.error("edge script not supported yet")

    value = edge.value
    if is_variable_name(value):
        value = strings.expand_template("getDocValue({{path}})",
                                        {"path": quote(value)})
    else:
        Log.error("not handled")

    def fromTerm(value):
        return edge.domain.getPartByKey(value)

    return Data(toTerm={"head": "", "body": value}, fromTerm=fromTerm)
Beispiel #33
0
def quote_column(*path):
    if DEBUG:
        if not path:
            Log.error("expecting a name")
        for p in path:
            if not is_text(p):
                Log.error("expecting strings, not {{type}}",
                          type=p.__class__.__name__)
    try:
        output = ConcatSQL(SQL_SPACE,
                           JoinSQL(SQL_DOT, [SQL(quote(p)) for p in path]),
                           SQL_SPACE)
        return output
    except Exception as e:
        Log.error("Not expacted", cause=e)
Beispiel #34
0
def compileString2Term(edge):
    if edge.esscript:
        Log.error("edge script not supported yet")

    value = edge.value
    if is_variable_name(value):
        value = strings.expand_template("getDocValue({{path}})", {"path": quote(value)})
    else:
        Log.error("not handled")

    def fromTerm(value):
        return edge.domain.getPartByKey(value)

    return Data(
        toTerm={"head": "", "body": value},
        fromTerm=fromTerm
    )
    def format(self):
        value = self.node.value
        if isinstance(value, str):
            if self.is_multiline_string:
                yield '"""' + value + '"""'
            else:
                yield quote(value)
        elif isinstance(value, (float, int)):
            yield str(value)
        elif value is None:
            yield "None"
        elif isinstance(value, type(...)):
            yield "..."
        elif isinstance(value, bytes):
            yield repr(value)
        else:
            Log.error("do not know how to handle {{type}}",
                      type=value.__class__.__name__)

        format_comment(self.line_comment)
Beispiel #36
0
def compile_expression(source):
    """
    THIS FUNCTION IS ON ITS OWN FOR MINIMAL GLOBAL NAMESPACE

    :param source:  PYTHON SOURCE CODE
    :return:  PYTHON FUNCTION
    """
    fake_locals = {}
    try:
        exec(
            """
def output(row, rownum=None, rows=None):
    _source = """ + strings.quote(source) + """
    try:
        return """ + source + """
    except Exception as e:
        Log.error("Problem with dynamic function {{func|quote}}",  func=_source, cause=e)
""", globals(), fake_locals)
    except Exception as e:
        Log.error("Bad source: {{source}}", source=source, cause=e)
    return fake_locals['output']
Beispiel #37
0
 def to_es_script(self, schema, not_null=False, boolean=False, many=True):
     if is_op(self.expr, Variable_):
         if self.expr.var == "_id":
             return EsScript(type=BOOLEAN,
                             expr="false",
                             frum=self,
                             schema=schema)
         else:
             columns = schema.leaves(self.expr.var)
             return (AndOp([
                 EsScript(
                     type=BOOLEAN,
                     expr="doc[" + quote(c.es_column) + "].empty",
                     frum=self,
                     schema=schema,
                 ) for c in columns
             ]).partial_eval().to_es_script(schema))
     elif is_literal(self.expr):
         return self.expr.missing().to_es_script(schema)
     else:
         return self.expr.missing().partial_eval().to_es_script(schema)
Beispiel #38
0
def compile_expression(source, function_name="output"):
    """
    THIS FUNCTION IS ON ITS OWN FOR MINIMAL GLOBAL NAMESPACE

    :param source:  PYTHON SOURCE CODE
    :param function_name:  OPTIONAL NAME TO GIVE TO OUTPUT FUNCTION
    :return:  PYTHON FUNCTION
    """

    fake_locals = {}
    try:
        exec(
            ("def " + function_name + "(row, rownum=None, rows=None):\n" +
             "    _source = " + strings.quote(source) + "\n" + "    try:\n" +
             "        return " + source + "\n" +
             "    except Exception as e:\n" +
             "        Log.error(u'Problem with dynamic function {{func|quote}}',  func=_source, cause=e)\n"
             ),
            GLOBALS,
            fake_locals,
        )
    except Exception as e:
        Log.error(u"Bad source: {{source}}", source=source, cause=e)
    return fake_locals["output"]
Beispiel #39
0
def to_python(self, not_null=False, boolean=False, many=False):
    return ("wrap_leaves({" + ','.join(
        quote(t['name']) + ":" + t['value'].to_python()
        for t in self.terms) + "})")
Beispiel #40
0
def to_python(self, not_null=False, boolean=False, many=False):
    return "re.match(" + quote(json2value(self.pattern.json) +
                               "$") + ", " + self.var.to_python() + ")"
Beispiel #41
0
    def Parts2Term(self, domain):
        """
        TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|)

        CONVERT AN ARRAY OF PARTS{name, esfilter} TO AN MVEL EXPRESSION
        RETURN expression, function PAIR, WHERE
            expression - MVEL EXPRESSION
            function - TAKES RESULT OF expression AND RETURNS PART
        """
        fields = domain.dimension.fields

        term = []
        if len(split_field(self.fromData.name)) == 1 and fields:
            if isinstance(fields, Mapping):
                # CONVERT UNORDERED FIELD DEFS
                jx_fields, es_fields = transpose(*[(k, fields[k]) for k in sorted(fields.keys())])
            else:
                jx_fields, es_fields = transpose(*[(i, e) for i, e in enumerate(fields)])

            # NO LOOPS BECAUSE QUERY IS SHALLOW
            # DOMAIN IS FROM A DIMENSION, USE IT'S FIELD DEFS TO PULL
            if len(es_fields) == 1:
                def fromTerm(term):
                    return domain.getPartByKey(term)

                return Data(
                    head="",
                    body='getDocValue('+quote(domain.dimension.fields[0])+')'
                ), fromTerm
            else:
                def fromTerm(term):
                    terms = [convert.pipe2value(t) for t in convert.pipe2value(term).split("|")]

                    candidate = dict(zip(jx_fields, terms))
                    for p in domain.partitions:
                        for k, t in candidate.items():
                            if p.value[k] != t:
                                break
                        else:
                            return p
                    if domain.type in ["uid", "default"]:
                        part = {"value": candidate}
                        domain.partitions.append(part)
                        return part
                    else:
                        return Null

                for f in es_fields:
                    term.append('Value2Pipe(getDocValue('+quote(f)+'))')

                return Data(
                    head="",
                    body='Value2Pipe('+('+"|"+'.join(term))+')'
                ), fromTerm
        else:
            for v in domain.partitions:
                term.append("if (" + _where(v.esfilter, lambda x: self._translate(x)) + ") " + value2MVEL(domain.getKey(v)) + "; else ")
            term.append(value2MVEL(domain.getKey(domain.NULL)))

            func_name = "_temp"+UID()
            return self.register_function("+\"|\"+".join(term))
Beispiel #42
0
def DataClass(name, columns, constraint=None):
    """
    Use the DataClass to define a class, but with some extra features:
    1. restrict the datatype of property
    2. restrict if `required`, or if `nulls` are allowed
    3. generic constraints on object properties

    It is expected that this class become a real class (or be removed) in the
    long term because it is expensive to use and should only be good for
    verifying program correctness, not user input.

    :param name: Name of the class we are creating
    :param columns: Each columns[i] has properties {
            "name",     - (required) name of the property
            "required", - False if it must be defined (even if None)
            "nulls",    - True if property can be None, or missing
            "default",  - A default value, if none is provided
            "type"      - a Python datatype
        }
    :param constraint: a JSON query Expression for extra constraints (return true if all constraints are met)
    :return: The class that has been created
    """

    columns = wrap(
        [
            {"name": c, "required": True, "nulls": False, "type": object}
            if is_text(c)
            else c
            for c in columns
        ]
    )
    slots = columns.name
    required = wrap(
        filter(lambda c: c.required and not c.nulls and not c.default, columns)
    ).name
    nulls = wrap(filter(lambda c: c.nulls, columns)).name
    defaults = {c.name: coalesce(c.default, None) for c in columns}
    types = {c.name: coalesce(c.jx_type, object) for c in columns}

    code = expand_template(
        """
from __future__ import unicode_literals
from mo_future import is_text, is_binary
from collections import Mapping

meta = None
types_ = {{types}}
defaults_ = {{defaults}}

class {{class_name}}(Mapping):
    __slots__ = {{slots}}


    def _constraint(row, rownum, rows):
        try:
            return {{constraint_expr}}
        except Exception as e:
            return False

    def __init__(self, **kwargs):
        if not kwargs:
            return

        for s in {{slots}}:
            object.__setattr__(self, s, kwargs.get(s, {{defaults}}.get(s, None)))

        missed = {{required}}-set(kwargs.keys())
        if missed:
            Log.error("Expecting properties {"+"{missed}}", missed=missed)

        illegal = set(kwargs.keys())-set({{slots}})
        if illegal:
            Log.error("{"+"{names}} are not a valid properties", names=illegal)

        if not self._constraint(0, [self]):
            Log.error("constraint not satisfied {"+"{expect}}\\n{"+"{value|indent}}", expect={{constraint}}, value=self)

    def __getitem__(self, item):
        return getattr(self, item)

    def __setitem__(self, item, value):
        setattr(self, item, value)
        return self

    def __setattr__(self, item, value):
        if item not in {{slots}}:
            Log.error("{"+"{item|quote}} not valid attribute", item=item)
        object.__setattr__(self, item, value)
        if not self._constraint(0, [self]):
            Log.error("constraint not satisfied {"+"{expect}}\\n{"+"{value|indent}}", expect={{constraint}}, value=self)

    def __getattr__(self, item):
        Log.error("{"+"{item|quote}} not valid attribute", item=item)

    def __hash__(self):
        return object.__hash__(self)

    def __eq__(self, other):
        if isinstance(other, {{class_name}}) and dict(self)==dict(other) and self is not other:
            Log.error("expecting to be same object")
        return self is other

    def __dict__(self):
        return {k: getattr(self, k) for k in {{slots}}}

    def items(self):
        return ((k, getattr(self, k)) for k in {{slots}})

    def __copy__(self):
        _set = object.__setattr__
        output = object.__new__({{class_name}})
        {{assign}}
        return output

    def __iter__(self):
        return {{slots}}.__iter__()

    def __len__(self):
        return {{len_slots}}

    def __str__(self):
        return str({{dict}})

""",
        {
            "class_name": name,
            "slots": "(" + (", ".join(quote(s) for s in slots)) + ")",
            "required": "{" + (", ".join(quote(s) for s in required)) + "}",
            "nulls": "{" + (", ".join(quote(s) for s in nulls)) + "}",
            "defaults": Literal(defaults).to_python(),
            "len_slots": len(slots),
            "dict": "{" + (", ".join(quote(s) + ": self." + s for s in slots)) + "}",
            "assign": "; ".join(
                "_set(output, " + quote(s) + ", self." + s + ")" for s in slots
            ),
            "types": "{"
            + (",".join(quote(k) + ": " + v.__name__ for k, v in types.items()))
            + "}",
            "constraint_expr": Python[jx_expression(constraint)].to_python(),
            "constraint": value2json(constraint),
        },
    )

    output = _exec(code, name)
    register_data(output)
    return output
Beispiel #43
0
def DataClass(name, columns, constraint=None):
    """
    Use the DataClass to define a class, but with some extra features:
    1. restrict the datatype of property
    2. restrict if `required`, or if `nulls` are allowed
    3. generic constraints on object properties

    It is expected that this class become a real class (or be removed) in the
    long term because it is expensive to use and should only be good for
    verifying program correctness, not user input.

    :param name: Name of the class we are creating
    :param columns: Each columns[i] has properties {
            "name",     - (required) name of the property
            "required", - False if it must be defined (even if None)
            "nulls",    - True if property can be None, or missing
            "default",  - A default value, if none is provided
            "type"      - a Python datatype
        }
    :param constraint: a JSON query Expression for extra constraints (return true if all constraints are met)
    :return: The class that has been created
    """

    columns = wrap([{
        "name": c,
        "required": True,
        "nulls": False,
        "type": object
    } if is_text(c) else c for c in columns])
    slots = columns.name
    required = wrap(
        filter(lambda c: c.required and not c.nulls and not c.default,
               columns)).name
    nulls = wrap(filter(lambda c: c.nulls, columns)).name
    defaults = {c.name: coalesce(c.default, None) for c in columns}
    types = {c.name: coalesce(c.jx_type, object) for c in columns}

    code = expand_template(
        """
from __future__ import unicode_literals
from mo_future import is_text, is_binary
from collections import Mapping

meta = None
types_ = {{types}}
defaults_ = {{defaults}}

class {{class_name}}(Mapping):
    __slots__ = {{slots}}


    def _constraint(row, rownum, rows):
        try:
            return {{constraint_expr}}
        except Exception as e:
            Log.error(
                "constraint\\n{" + "{code}}\\nnot satisfied {" + "{expect}}\\n{" + "{value|indent}}",
                code={{constraint_expr|quote}}, 
                expect={{constraint}}, 
                value=row,
                cause=e
            )

    def __init__(self, **kwargs):
        if not kwargs:
            return

        for s in {{slots}}:
            object.__setattr__(self, s, kwargs.get(s, {{defaults}}.get(s, None)))

        missed = {{required}}-set(kwargs.keys())
        if missed:
            Log.error("Expecting properties {"+"{missed}}", missed=missed)

        illegal = set(kwargs.keys())-set({{slots}})
        if illegal:
            Log.error("{"+"{names}} are not a valid properties", names=illegal)

        self._constraint(0, [self])

    def __getitem__(self, item):
        return getattr(self, item)

    def __setitem__(self, item, value):
        setattr(self, item, value)
        return self

    def __setattr__(self, item, value):
        if item not in {{slots}}:
            Log.error("{"+"{item|quote}} not valid attribute", item=item)
        object.__setattr__(self, item, value)
        self._constraint(0, [self])

    def __getattr__(self, item):
        Log.error("{"+"{item|quote}} not valid attribute", item=item)

    def __hash__(self):
        return object.__hash__(self)

    def __eq__(self, other):
        if isinstance(other, {{class_name}}) and dict(self)==dict(other) and self is not other:
            Log.error("expecting to be same object")
        return self is other

    def __dict__(self):
        return {k: getattr(self, k) for k in {{slots}}}

    def items(self):
        return ((k, getattr(self, k)) for k in {{slots}})

    def __copy__(self):
        _set = object.__setattr__
        output = object.__new__({{class_name}})
        {{assign}}
        return output

    def __iter__(self):
        return {{slots}}.__iter__()

    def __len__(self):
        return {{len_slots}}

    def __str__(self):
        return str({{dict}})

""",
        {
            "class_name":
            name,
            "slots":
            "(" + (", ".join(quote(s) for s in slots)) + ")",
            "required":
            "{" + (", ".join(quote(s) for s in required)) + "}",
            "nulls":
            "{" + (", ".join(quote(s) for s in nulls)) + "}",
            "defaults":
            Literal(defaults).to_python(),
            "len_slots":
            len(slots),
            "dict":
            "{" + (", ".join(quote(s) + ": self." + s for s in slots)) + "}",
            "assign":
            "; ".join("_set(output, " + quote(s) + ", self." + s + ")"
                      for s in slots),
            "types":
            "{" +
            (",".join(quote(k) + ": " + v.__name__
                      for k, v in types.items())) + "}",
            "constraint_expr":
            Python[jx_expression(not ENABLE_CONSTRAINTS
                                 or constraint)].to_python(),
            "constraint":
            value2json(constraint),
        },
    )

    output = _exec(code, name)
    register_data(output)
    return output
Beispiel #44
0
def pretty_json(value):
    try:
        if value is False:
            return "false"
        elif value is True:
            return "true"
        elif is_data(value):
            try:
                items = sort_using_key(value.items(), lambda r: r[0])
                values = [encode_basestring(k) + PRETTY_COLON + pretty_json(v) for k, v in items if v != None]
                if not values:
                    return "{}"
                elif len(values) == 1:
                    return "{" + values[0] + "}"
                else:
                    return "{\n" + ",\n".join(indent(v) for v in values) + "\n}"
            except Exception as e:
                from mo_logs import Log
                from mo_math import OR

                if OR(not is_text(k) for k in value.keys()):
                    Log.error(
                        "JSON must have string keys: {{keys}}:",
                        keys=[k for k in value.keys()],
                        cause=e
                    )

                Log.error(
                    "problem making dict pretty: keys={{keys}}:",
                    keys=[k for k in value.keys()],
                    cause=e
                )
        elif value in (None, Null):
            return "null"
        elif value.__class__ in (binary_type, text_type):
            if is_binary(value):
                value = utf82unicode(value)
            try:
                return quote(value)
            except Exception as e:
                from mo_logs import Log

                try:
                    Log.note("try explicit convert of string with length {{length}}", length=len(value))
                    acc = [QUOTE]
                    for c in value:
                        try:
                            try:
                                c2 = ESCAPE_DCT[c]
                            except Exception:
                                c2 = c
                            c3 = text_type(c2)
                            acc.append(c3)
                        except BaseException:
                            pass
                            # Log.warning("odd character {{ord}} found in string.  Ignored.",  ord= ord(c)}, cause=g)
                    acc.append(QUOTE)
                    output = u"".join(acc)
                    Log.note("return value of length {{length}}", length=len(output))
                    return output
                except BaseException as f:
                    Log.warning("can not convert {{type}} to json", type=f.__class__.__name__, cause=f)
                    return "null"
        elif is_list(value):
            if not value:
                return "[]"

            if ARRAY_MAX_COLUMNS == 1:
                return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]"

            if len(value) == 1:
                j = pretty_json(value[0])
                if j.find("\n") >= 0:
                    return "[\n" + indent(j) + "\n]"
                else:
                    return "[" + j + "]"

            js = [pretty_json(v) for v in value]
            max_len = max(*[len(j) for j in js])
            if max_len <= ARRAY_ITEM_MAX_LENGTH and max(*[j.find("\n") for j in js]) == -1:
                # ALL TINY VALUES
                num_columns = max(1, min(ARRAY_MAX_COLUMNS, int(floor((ARRAY_ROW_LENGTH + 2.0) / float(max_len + 2)))))  # +2 TO COMPENSATE FOR COMMAS
                if len(js) <= num_columns:  # DO NOT ADD \n IF ONLY ONE ROW
                    return "[" + PRETTY_COMMA.join(js) + "]"
                if num_columns == 1:  # DO NOT rjust IF THERE IS ONLY ONE COLUMN
                    return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]"

                content = ",\n".join(
                    PRETTY_COMMA.join(j.rjust(max_len) for j in js[r:r + num_columns])
                    for r in xrange(0, len(js), num_columns)
                )
                return "[\n" + indent(content) + "\n]"

            pretty_list = js

            output = ["[\n"]
            for i, p in enumerate(pretty_list):
                try:
                    if i > 0:
                        output.append(",\n")
                    output.append(indent(p))
                except Exception:
                    from mo_logs import Log

                    Log.warning("problem concatenating string of length {{len1}} and {{len2}}",
                        len1=len("".join(output)),
                        len2=len(p)
                    )
            output.append("\n]")
            try:
                return "".join(output)
            except Exception as e:
                from mo_logs import Log

                Log.error("not expected", cause=e)
        elif hasattr(value, '__data__'):
            d = value.__data__()
            return pretty_json(d)
        elif hasattr(value, '__json__'):
            j = value.__json__()
            if j == None:
                return "   null   "  # TODO: FIND OUT WHAT CAUSES THIS
            return pretty_json(json_decoder(j))
        elif scrub(value) is None:
            return "null"
        elif hasattr(value, '__iter__'):
            return pretty_json(list(value))
        elif hasattr(value, '__call__'):
            return "null"
        else:
            try:
                if int(value) == value:
                    return text_type(int(value))
            except Exception:
                pass

            try:
                if float(value) == value:
                    return text_type(float(value))
            except Exception:
                pass

            return pypy_json_encode(value)

    except Exception as e:
        problem_serializing(value, e)
Beispiel #45
0
def typed_encode(value, sub_schema, path, net_new_properties, buffer):
    """
    :param value: THE DATA STRUCTURE TO ENCODE
    :param sub_schema: dict FROM PATH TO Column DESCRIBING THE TYPE
    :param path: list OF CURRENT PATH
    :param net_new_properties: list FOR ADDING NEW PROPERTIES NOT FOUND IN sub_schema
    :param buffer: UnicodeBuilder OBJECT
    :return:
    """
    try:
        # from jx_base import Column
        if sub_schema.__class__.__name__=='Column':
            value_json_type = python_type_to_json_type[value.__class__]
            column_json_type = es_type_to_json_type[sub_schema.es_type]

            if value_json_type == column_json_type:
                pass  # ok
            elif value_json_type == NESTED and all(python_type_to_json_type[v.__class__] == column_json_type for v in value if v != None):
                pass  # empty arrays can be anything
            else:
                from mo_logs import Log

                Log.error("Can not store {{value}} in {{column|quote}}", value=value, column=sub_schema.name)

            sub_schema = {json_type_to_inserter_type[value_json_type]: sub_schema}

        if value == None:
            from mo_logs import Log
            Log.error("can not encode null (missing) values")
        elif value is True:
            if BOOLEAN_TYPE not in sub_schema:
                sub_schema[BOOLEAN_TYPE] = {}
                net_new_properties.append(path + [BOOLEAN_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_BOOLEAN_TYPE)
            append(buffer, 'true}')
            return
        elif value is False:
            if BOOLEAN_TYPE not in sub_schema:
                sub_schema[BOOLEAN_TYPE] = {}
                net_new_properties.append(path + [BOOLEAN_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_BOOLEAN_TYPE)
            append(buffer, 'false}')
            return

        _type = value.__class__
        if _type in (dict, Data):
            if sub_schema.__class__.__name__ == 'Column':
                from mo_logs import Log
                Log.error("Can not handle {{column|json}}", column=sub_schema)

            if NESTED_TYPE in sub_schema:
                # PREFER NESTED, WHEN SEEN BEFORE
                if value:
                    append(buffer, '{')
                    append(buffer, QUOTED_NESTED_TYPE)
                    append(buffer, '[')
                    _dict2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer)
                    append(buffer, ']' + COMMA)
                    append(buffer, QUOTED_EXISTS_TYPE)
                    append(buffer, text_type(len(value)))
                    append(buffer, '}')
                else:
                    # SINGLETON LIST
                    append(buffer, '{')
                    append(buffer, QUOTED_NESTED_TYPE)
                    append(buffer, '[{')
                    append(buffer, QUOTED_EXISTS_TYPE)
                    append(buffer, '1}]')
                    append(buffer, COMMA)
                    append(buffer, QUOTED_EXISTS_TYPE)
                    append(buffer, '1}')
            else:
                if EXISTS_TYPE not in sub_schema:
                    sub_schema[EXISTS_TYPE] = {}
                    net_new_properties.append(path + [EXISTS_TYPE])

                if value:
                    _dict2json(value, sub_schema, path, net_new_properties, buffer)
                else:
                    append(buffer, '{')
                    append(buffer, QUOTED_EXISTS_TYPE)
                    append(buffer, '1}')
        elif _type is binary_type:
            if STRING_TYPE not in sub_schema:
                sub_schema[STRING_TYPE] = True
                net_new_properties.append(path + [STRING_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_STRING_TYPE)
            append(buffer, '"')
            try:
                v = utf82unicode(value)
            except Exception as e:
                raise problem_serializing(value, e)

            for c in v:
                append(buffer, ESCAPE_DCT.get(c, c))
            append(buffer, '"}')
        elif _type is text_type:
            if STRING_TYPE not in sub_schema:
                sub_schema[STRING_TYPE] = True
                net_new_properties.append(path + [STRING_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_STRING_TYPE)
            append(buffer, '"')
            for c in value:
                append(buffer, ESCAPE_DCT.get(c, c))
            append(buffer, '"}')
        elif _type in (int, long):
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])

            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, text_type(value))
            append(buffer, '}')
        elif _type in (float, Decimal):
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, float2json(value))
            append(buffer, '}')
        elif _type in (set, list, tuple, FlatList):
            if len(value) == 0:
                append(buffer, '{')
                append(buffer, QUOTED_EXISTS_TYPE)
                append(buffer, '0}')
            elif any(v.__class__ in (Data, dict, set, list, tuple, FlatList) for v in value):
                # THIS IS NOT DONE BECAUSE
                if len(value) == 1:
                    if NESTED_TYPE in sub_schema:
                        append(buffer, '{')
                        append(buffer, QUOTED_NESTED_TYPE)
                        _list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer)
                        append(buffer, '}')
                    else:
                        # NO NEED TO NEST, SO DO NOT DO IT
                        typed_encode(value[0], sub_schema, path, net_new_properties, buffer)
                else:
                    if NESTED_TYPE not in sub_schema:
                        sub_schema[NESTED_TYPE] = {}
                        net_new_properties.append(path + [NESTED_TYPE])
                    append(buffer, '{')
                    append(buffer, QUOTED_NESTED_TYPE)
                    _list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer)
                    append(buffer, '}')
            else:
                # ALLOW PRIMITIVE MULTIVALUES
                value = [v for v in value if v != None]
                types = list(set(json_type_to_inserter_type[python_type_to_json_type[v.__class__]] for v in value))
                if len(types) == 0:  # HANDLE LISTS WITH Nones IN THEM
                    append(buffer, '{')
                    append(buffer, QUOTED_NESTED_TYPE)
                    append(buffer, '[]}')
                elif len(types) > 1:
                    _list2json(value, sub_schema, path + [NESTED_TYPE], net_new_properties, buffer)
                else:
                    element_type = types[0]
                    if element_type not in sub_schema:
                        sub_schema[element_type] = True
                        net_new_properties.append(path + [element_type])
                    append(buffer, '{')
                    append(buffer, quote(element_type))
                    append(buffer, COLON)
                    _multivalue2json(value, sub_schema[element_type], path + [element_type], net_new_properties, buffer)
                    append(buffer, '}')
        elif _type is date:
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, float2json(time.mktime(value.timetuple())))
            append(buffer, '}')
        elif _type is datetime:
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, float2json(time.mktime(value.timetuple())))
            append(buffer, '}')
        elif _type is Date:
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, float2json(value.unix))
            append(buffer, '}')
        elif _type is timedelta:
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, float2json(value.total_seconds()))
            append(buffer, '}')
        elif _type is Duration:
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, float2json(value.seconds))
            append(buffer, '}')
        elif _type is NullType:
            append(buffer, 'null')
        elif hasattr(value, '__data__'):
            typed_encode(value.__data__(), sub_schema, path, net_new_properties, buffer)
        elif hasattr(value, '__iter__'):
            if NESTED_TYPE not in sub_schema:
                sub_schema[NESTED_TYPE] = {}
                net_new_properties.append(path + [NESTED_TYPE])

            append(buffer, '{')
            append(buffer, QUOTED_NESTED_TYPE)
            _iter2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer)
            append(buffer, '}')
        else:
            from mo_logs import Log

            Log.error(text_type(repr(value)) + " is not JSON serializable")
    except Exception as e:
        from mo_logs import Log

        Log.error(text_type(repr(value)) + " is not JSON serializable", cause=e)
Beispiel #46
0
def string2quote(value):
    if value == None:
        return "None"
    return quote(value)
Beispiel #47
0
    def _update_cardinality(self, column):
        """
        QUERY ES TO FIND CARDINALITY AND PARTITIONS FOR A SIMPLE COLUMN
        """
        if column.es_index in self.index_does_not_exist:
            return

        if column.jx_type in STRUCT:
            Log.error("not supported")
        try:
            if column.es_index == "meta.columns":
                partitions = jx.sort([g[column.es_column] for g, _ in jx.groupby(self.meta.columns, column.es_column) if g[column.es_column] != None])
                self.meta.columns.update({
                    "set": {
                        "partitions": partitions,
                        "count": len(self.meta.columns),
                        "cardinality": len(partitions),
                        "multi": 1,
                        "last_updated": Date.now()
                    },
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
                return
            if column.es_index == "meta.tables":
                partitions = jx.sort([g[column.es_column] for g, _ in jx.groupby(self.meta.tables, column.es_column) if g[column.es_column] != None])
                self.meta.columns.update({
                    "set": {
                        "partitions": partitions,
                        "count": len(self.meta.tables),
                        "cardinality": len(partitions),
                        "multi": 1,
                        "last_updated": Date.now()
                    },
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
                return

            es_index = column.es_index.split(".")[0]

            is_text = [cc for cc in self.meta.columns if cc.es_column == column.es_column and cc.es_type == "text"]
            if is_text:
                # text IS A MULTIVALUE STRING THAT CAN ONLY BE FILTERED
                result = self.es_cluster.post("/" + es_index + "/_search", data={
                    "aggs": {
                        "count": {"filter": {"match_all": {}}}
                    },
                    "size": 0
                })
                count = result.hits.total
                cardinality = max(1001, count)
                multi = 1001
            elif column.es_column == "_id":
                result = self.es_cluster.post("/" + es_index + "/_search", data={
                    "query": {"match_all": {}},
                    "size": 0
                })
                count = cardinality = result.hits.total
                multi = 1
            elif column.es_type == BOOLEAN:
                result = self.es_cluster.post("/" + es_index + "/_search", data={
                    "aggs": {
                        "count": _counting_query(column)
                    },
                    "size": 0
                })
                count = result.hits.total
                cardinality = 2
                multi = 1
            else:
                result = self.es_cluster.post("/" + es_index + "/_search", data={
                    "aggs": {
                        "count": _counting_query(column),
                        "multi": {"max": {"script": "doc[" + quote(column.es_column) + "].values.size()"}}
                    },
                    "size": 0
                })
                agg_results = result.aggregations
                count = result.hits.total
                cardinality = coalesce(agg_results.count.value, agg_results.count._nested.value, agg_results.count.doc_count)
                multi = int(coalesce(agg_results.multi.value, 1))
                if cardinality == None:
                   Log.error("logic error")

            query = Data(size=0)

            if column.es_column == "_id":
                self.meta.columns.update({
                    "set": {
                        "count": cardinality,
                        "cardinality": cardinality,
                        "multi": 1,
                        "last_updated": Date.now()
                    },
                    "clear": ["partitions"],
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
                return
            elif cardinality > 1000 or (count >= 30 and cardinality == count) or (count >= 1000 and cardinality / count > 0.99):
                DEBUG and Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality)
                self.meta.columns.update({
                    "set": {
                        "count": count,
                        "cardinality": cardinality,
                        "multi": multi,
                        "last_updated": Date.now()
                    },
                    "clear": ["partitions"],
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
                return
            elif column.es_type in elasticsearch.ES_NUMERIC_TYPES and cardinality > 30:
                DEBUG and Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality)
                self.meta.columns.update({
                    "set": {
                        "count": count,
                        "cardinality": cardinality,
                        "multi": multi,
                        "last_updated": Date.now()
                    },
                    "clear": ["partitions"],
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
                return
            elif len(column.nested_path) != 1:
                query.aggs["_"] = {
                    "nested": {"path": column.nested_path[0]},
                    "aggs": {"_nested": {"terms": {"field": column.es_column}}}
                }
            elif cardinality == 0:
                query.aggs["_"] = {"terms": {"field": column.es_column}}
            else:
                query.aggs["_"] = {"terms": {"field": column.es_column, "size": cardinality}}

            result = self.es_cluster.post("/" + es_index + "/_search", data=query)

            aggs = result.aggregations._
            if aggs._nested:
                parts = jx.sort(aggs._nested.buckets.key)
            else:
                parts = jx.sort(aggs.buckets.key)

            self.meta.columns.update({
                "set": {
                    "count": count,
                    "cardinality": cardinality,
                    "multi": multi,
                    "partitions": parts,
                    "last_updated": Date.now()
                },
                "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
            })
        except Exception as e:
            # CAN NOT IMPORT: THE TEST MODULES SETS UP LOGGING
            # from tests.test_jx import TEST_TABLE
            e = Except.wrap(e)
            TEST_TABLE = "testdata"
            is_missing_index = any(w in e for w in ["IndexMissingException", "index_not_found_exception"])
            is_test_table = column.es_index.startswith((TEST_TABLE_PREFIX, TEST_TABLE))
            if is_missing_index and is_test_table:
                # WE EXPECT TEST TABLES TO DISAPPEAR
                self.meta.columns.update({
                    "clear": ".",
                    "where": {"eq": {"es_index": column.es_index}}
                })
                self.index_does_not_exist.add(column.es_index)
            else:
                self.meta.columns.update({
                    "set": {
                        "last_updated": Date.now()
                    },
                    "clear": [
                        "count",
                        "cardinality",
                        "multi",
                        "partitions",
                    ],
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
                Log.warning("Could not get {{col.es_index}}.{{col.es_column}} info", col=column, cause=e)
Beispiel #48
0
        append(buffer, QUOTED_EXISTS_TYPE)
        append(buffer, '1}')




TYPE_PREFIX = "~"  # u'\u0442\u0443\u0440\u0435-'  # "туре"
BOOLEAN_TYPE = TYPE_PREFIX + "b~"
NUMBER_TYPE = TYPE_PREFIX + "n~"
STRING_TYPE = TYPE_PREFIX + "s~"
NESTED_TYPE = TYPE_PREFIX + "N~"
EXISTS_TYPE = TYPE_PREFIX + "e~"

append = UnicodeBuilder.append

QUOTED_BOOLEAN_TYPE = quote(BOOLEAN_TYPE) + COLON
QUOTED_NUMBER_TYPE = quote(NUMBER_TYPE) + COLON
QUOTED_STRING_TYPE = quote(STRING_TYPE) + COLON
QUOTED_NESTED_TYPE = quote(NESTED_TYPE) + COLON
QUOTED_EXISTS_TYPE = quote(EXISTS_TYPE) + COLON

inserter_type_to_json_type = {
    BOOLEAN_TYPE: BOOLEAN,
    NUMBER_TYPE: NUMBER,
    STRING_TYPE: STRING
}

json_type_to_inserter_type = {
    BOOLEAN: BOOLEAN_TYPE,
    INTEGER: NUMBER_TYPE,
    NUMBER: NUMBER_TYPE,
Beispiel #49
0
def pretty_json(value):
    try:
        if value is False:
            return "false"
        elif value is True:
            return "true"
        elif isinstance(value, Mapping):
            try:
                items = sort_using_key(list(value.items()), lambda r: r[0])
                values = [
                    encode_basestring(k) + PRETTY_COLON +
                    indent(pretty_json(v)).strip() for k, v in items
                    if v != None
                ]
                if not values:
                    return "{}"
                elif len(values) == 1:
                    return "{" + values[0] + "}"
                else:
                    return "{\n" + INDENT + (",\n" +
                                             INDENT).join(values) + "\n}"
            except Exception as e:
                from mo_logs import Log
                from mo_math import OR

                if OR(not isinstance(k, text_type) for k in value.keys()):
                    Log.error("JSON must have string keys: {{keys}}:",
                              keys=[k for k in value.keys()],
                              cause=e)

                Log.error("problem making dict pretty: keys={{keys}}:",
                          keys=[k for k in value.keys()],
                          cause=e)
        elif value in (None, Null):
            return "null"
        elif isinstance(value, (text_type, binary_type)):
            if isinstance(value, binary_type):
                value = utf82unicode(value)
            try:
                return quote(value)
            except Exception as e:
                from mo_logs import Log

                try:
                    Log.note(
                        "try explicit convert of string with length {{length}}",
                        length=len(value))
                    acc = [QUOTE]
                    for c in value:
                        try:
                            try:
                                c2 = ESCAPE_DCT[c]
                            except Exception:
                                c2 = c
                            c3 = text_type(c2)
                            acc.append(c3)
                        except BaseException:
                            pass
                            # Log.warning("odd character {{ord}} found in string.  Ignored.",  ord= ord(c)}, cause=g)
                    acc.append(QUOTE)
                    output = u"".join(acc)
                    Log.note("return value of length {{length}}",
                             length=len(output))
                    return output
                except BaseException as f:
                    Log.warning("can not even explicit convert {{type}}",
                                type=f.__class__.__name__,
                                cause=f)
                    return "null"
        elif isinstance(value, list):
            if not value:
                return "[]"

            if ARRAY_MAX_COLUMNS == 1:
                return "[\n" + ",\n".join(
                    [indent(pretty_json(v)) for v in value]) + "\n]"

            if len(value) == 1:
                j = pretty_json(value[0])
                if j.find("\n") >= 0:
                    return "[\n" + indent(j) + "\n]"
                else:
                    return "[" + j + "]"

            js = [pretty_json(v) for v in value]
            max_len = max(*[len(j) for j in js])
            if max_len <= ARRAY_ITEM_MAX_LENGTH and max(
                    *[j.find("\n") for j in js]) == -1:
                # ALL TINY VALUES
                num_columns = max(
                    1,
                    min(
                        ARRAY_MAX_COLUMNS,
                        int(
                            floor((ARRAY_ROW_LENGTH + 2.0) /
                                  float(max_len +
                                        2)))))  # +2 TO COMPENSATE FOR COMMAS
                if len(js) <= num_columns:  # DO NOT ADD \n IF ONLY ONE ROW
                    return "[" + PRETTY_COMMA.join(js) + "]"
                if num_columns == 1:  # DO NOT rjust IF THERE IS ONLY ONE COLUMN
                    return "[\n" + ",\n".join(
                        [indent(pretty_json(v)) for v in value]) + "\n]"

                content = ",\n".join(
                    PRETTY_COMMA.join(
                        j.rjust(max_len) for j in js[r:r + num_columns])
                    for r in xrange(0, len(js), num_columns))
                return "[\n" + indent(content) + "\n]"

            pretty_list = js

            output = ["[\n"]
            for i, p in enumerate(pretty_list):
                try:
                    if i > 0:
                        output.append(",\n")
                    output.append(indent(p))
                except Exception:
                    from mo_logs import Log

                    Log.warning(
                        "problem concatenating string of length {{len1}} and {{len2}}",
                        len1=len("".join(output)),
                        len2=len(p))
            output.append("\n]")
            try:
                return "".join(output)
            except Exception as e:
                from mo_logs import Log

                Log.error("not expected", cause=e)
        elif hasattr(value, '__data__'):
            d = value.__data__()
            return pretty_json(d)
        elif hasattr(value, '__json__'):
            j = value.__json__()
            if j == None:
                return "   null   "  # TODO: FIND OUT WHAT CAUSES THIS
            return pretty_json(json_decoder(j))
        elif scrub(value) is None:
            return "null"
        elif hasattr(value, '__iter__'):
            return pretty_json(list(value))
        elif hasattr(value, '__call__'):
            return "null"
        else:
            try:
                if int(value) == value:
                    return text_type(int(value))
            except Exception:
                pass

            try:
                if float(value) == value:
                    return text_type(float(value))
            except Exception:
                pass

            return pypy_json_encode(value)

    except Exception as e:
        problem_serializing(value, e)
def assertAlmostEqual(test,
                      expected,
                      digits=None,
                      places=None,
                      msg=None,
                      delta=None):
    show_detail = True
    test = unwrap(test)
    expected = unwrap(expected)
    try:
        if test is None and (is_null_op(expected) or expected is None):
            return
        elif test is expected:
            return
        elif is_text(expected):
            assertAlmostEqualValue(test,
                                   expected,
                                   msg=msg,
                                   digits=digits,
                                   places=places,
                                   delta=delta)
        elif isinstance(test, UniqueIndex):
            if test ^ expected:
                Log.error("Sets do not match")
        elif is_data(expected) and is_data(test):
            for k, e in unwrap(expected).items():
                t = test.get(k)
                assertAlmostEqual(t,
                                  e,
                                  msg=coalesce(msg, "") + "key " + quote(k) +
                                  ": ",
                                  digits=digits,
                                  places=places,
                                  delta=delta)
        elif is_data(expected):
            if is_many(test):
                test = list(test)
                if len(test) != 1:
                    Log.error("Expecting data, not a list")
                test = test[0]
            for k, e in expected.items():
                if is_text(k):
                    t = mo_dots.get_attr(test, literal_field(k))
                else:
                    t = test[k]
                assertAlmostEqual(t,
                                  e,
                                  msg=msg,
                                  digits=digits,
                                  places=places,
                                  delta=delta)
        elif is_container(test) and isinstance(expected, set):
            test = set(to_data(t) for t in test)
            if len(test) != len(expected):
                Log.error(
                    "Sets do not match, element count different:\n{{test|json|indent}}\nexpecting{{expectedtest|json|indent}}",
                    test=test,
                    expected=expected)

            for e in expected:
                for t in test:
                    try:
                        assertAlmostEqual(t,
                                          e,
                                          msg=msg,
                                          digits=digits,
                                          places=places,
                                          delta=delta)
                        break
                    except Exception as _:
                        pass
                else:
                    Log.error(
                        "Sets do not match. {{value|json}} not found in {{test|json}}",
                        value=e,
                        test=test)

        elif isinstance(expected, types.FunctionType):
            return expected(test)
        elif hasattr(test, "__iter__") and hasattr(expected, "__iter__"):
            if test.__class__.__name__ == "ndarray":  # numpy
                test = test.tolist()
            elif test.__class__.__name__ == "DataFrame":  # pandas
                test = test[test.columns[0]].values.tolist()
            elif test.__class__.__name__ == "Series":  # pandas
                test = test.values.tolist()

            if not expected and test == None:
                return
            if expected == None:
                expected = []  # REPRESENT NOTHING
            for t, e in zip_longest(test, expected):
                assertAlmostEqual(t,
                                  e,
                                  msg=msg,
                                  digits=digits,
                                  places=places,
                                  delta=delta)
        else:
            assertAlmostEqualValue(test,
                                   expected,
                                   msg=msg,
                                   digits=digits,
                                   places=places,
                                   delta=delta)
    except Exception as e:
        Log.error(
            "{{test|json|limit(10000)}} does not match expected {{expected|json|limit(10000)}}",
            test=test if show_detail else "[can not show]",
            expected=expected if show_detail else "[can not show]",
            cause=e)
Beispiel #51
0
    def append_query(self, query_path, es_query):
        es_field = first(self.query.frum.schema.leaves(self.var)).es_column

        return Aggs().add(TermsAggs("_match", {
            "script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
        }, self).add(es_query))
Beispiel #52
0
def to_python(self, not_null=False, boolean=False, many=False):
    return "((" + quote(self.substring) + " in " + self.var.to_python(
    ) + ") if " + self.var.to_python() + "!=None else False)"
Beispiel #53
0
def _where(esFilter, _translate):
    if not esFilter or esFilter is True:
        return "true"

    keys = esFilter.keys()
    if len(keys) != 1:
        Log.error("Expecting only one filter aggregate")

    op = keys[0]
    if op == "and":
        list = esFilter[op]
        if not (list):
            return "true"
        if len(list) == 1:
            return _where(list[0], _translate)
        output = "(" + " && ".join(_where(l, _translate) for l in list) + ")"
        return output
    elif op == "or":
        list = esFilter[op]
        if not list:
            return "false"
        if len(list) == 1:
            return _where(list[0], _translate)
        output = "(" + " || ".join(_where(l, _translate) for l in list) + ")"
        return output
    elif op == "not":
        return "!(" + _where(esFilter[op, _translate]) + ")"
    elif op == "term":
        pair = esFilter[op]
        if len(pair.keys()) == 1:
            return [_translate(k) + "==" + value2MVEL(v) for k, v in pair.items()][0]
        else:
            return "(" + " && ".join(_translate(k) + "==" + value2MVEL(v) for k, v in pair.items()) + ")"
    elif op == "terms":
        output = []
        for variableName, valueList in esFilter[op].items():
            if not valueList:
                Log.error("Expecting something in 'terms' array")
            if len(valueList) == 1:
                output.append(_translate(variableName) + "==" + value2MVEL(valueList[0]))
            else:
                output.append("(" + " || ".join(_translate(variableName) + "==" + value2MVEL(v) for v in valueList) + ")")
        return " && ".join(output)
    elif op == "exists":
        # "exists":{"field":"myField"}
        pair = esFilter[op]
        variableName = pair.field
        return "(" + _translate(variableName) + "!=null)"
    elif op == "missing":
        fieldName = _translate(esFilter[op].field)
        testExistence = coalesce(esFilter[op].existence, True)
        testNull = coalesce(esFilter[op].null_value, True)

        output = []
        if testExistence and not testNull:
            output.append("(" + fieldName.replace(".?", ".") + " == empty)")        # REMOVE THE .? SO WE REFER TO THE FIELD, NOT GET THE VALUE
        if testNull:
            output.append("(" + fieldName + "==null)")
        return " || ".join(output)
    elif op == "range":
        pair = esFilter[op]
        ranges = []

        for variableName, r in pair.items():
            if r.gte:
                ranges.append(value2MVEL(r.gte) + "<=" + _translate(variableName))
            elif r.gt:
                ranges.append(value2MVEL(r.gt) + "<" + _translate(variableName))
            elif r["from"]:
                if r.include_lower == None or r.include_lower:
                    ranges.append(value2MVEL(r["from"]) + "<=" + _translate(variableName))
                else:
                    ranges.append(value2MVEL(r["from"]) + "<" + _translate(variableName))

            if r.lte:
                ranges.append(value2MVEL(r.lte) + ">=" + _translate(variableName))
            elif r.lt:
                ranges.append(value2MVEL(r.lt) + ">" + _translate(variableName))
            elif r["from"]:
                if r.include_lower == None or r.include_lower:
                    ranges.append(value2MVEL(r["from"]) + ">=" + _translate(variableName))
                else:
                    ranges.append(value2MVEL(r["from"]) + ">" + _translate(variableName))

        return "("+" && ".join(ranges)+")"

    elif op == "script":
        script = esFilter[op].script
        return _translate(script)
    elif op == "prefix":
        pair = esFilter[op]
        variableName, value = pair.items()[0]
        return _translate(variableName) + ".startsWith(" + quote(value) + ")"
    elif op == "match_all":
        return "true"
    else:
        Log.error("'" + op + "' is an unknown aggregate")

    return ""
Beispiel #54
0
def es_aggsop(es, frum, query):
    query = query.copy()  # WE WILL MARK UP THIS QUERY
    schema = frum.schema
    select = listwrap(query.select)

    es_query = Data()
    new_select = Data()  # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
    formula = []
    for s in select:
        if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".":
            if schema.query_path == ".":
                s.pull = jx_expression_to_function("doc_count")
            else:
                s.pull = jx_expression_to_function({"coalesce": ["_nested.doc_count", "doc_count", 0]})
        elif isinstance(s.value, Variable):
            if s.aggregate == "count":
                new_select["count_"+literal_field(s.value.var)] += [s]
            else:
                new_select[literal_field(s.value.var)] += [s]
        elif s.aggregate:
            formula.append(s)

    for canonical_name, many in new_select.items():
        for s in many:
            columns = frum.schema.values(s.value.var)

            if s.aggregate == "count":
                canonical_names = []
                for column in columns:
                    cn = literal_field(column.es_column + "_count")
                    if column.jx_type == EXISTS:
                        canonical_names.append(cn + ".doc_count")
                        es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}}
                    else:
                        canonical_names.append(cn+ ".value")
                        es_query.aggs[cn].value_count.field = column.es_column
                if len(canonical_names) == 1:
                    s.pull = jx_expression_to_function(canonical_names[0])
                else:
                    s.pull = jx_expression_to_function({"add": canonical_names})
            elif s.aggregate == "median":
                if len(columns) > 1:
                    Log.error("Do not know how to count columns with more than one type (script probably)")
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")

                es_query.aggs[key].percentiles.field = columns[0].es_column
                es_query.aggs[key].percentiles.percents += [50]
                s.pull = jx_expression_to_function(key + ".values.50\\.0")
            elif s.aggregate == "percentile":
                if len(columns) > 1:
                    Log.error("Do not know how to count columns with more than one type (script probably)")
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")
                if isinstance(s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile:
                    Log.error("Expecting percentile to be a float from 0.0 to 1.0")
                percent = Math.round(s.percentile * 100, decimal=6)

                es_query.aggs[key].percentiles.field = columns[0].es_column
                es_query.aggs[key].percentiles.percents += [percent]
                es_query.aggs[key].percentiles.tdigest.compression = 2
                s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
            elif s.aggregate == "cardinality":
                canonical_names = []
                for column in columns:
                    cn = literal_field(column.es_column + "_cardinality")
                    canonical_names.append(cn)
                    es_query.aggs[cn].cardinality.field = column.es_column
                if len(columns) == 1:
                    s.pull = jx_expression_to_function(canonical_names[0] + ".value")
                else:
                    s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0})
            elif s.aggregate == "stats":
                if len(columns) > 1:
                    Log.error("Do not know how to count columns with more than one type (script probably)")
                # REGULAR STATS
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].extended_stats.field = columns[0].es_column

                # GET MEDIAN TOO!
                median_name = literal_field(canonical_name + "_percentile")
                es_query.aggs[median_name].percentiles.field = columns[0].es_column
                es_query.aggs[median_name].percentiles.percents += [50]

                s.pull = get_pull_stats(stats_name, median_name)
            elif s.aggregate == "union":
                pulls = []
                for column in columns:
                    script = {"scripted_metric": {
                        'init_script': 'params._agg.terms = new HashSet()',
                        'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.add(v);',
                        'combine_script': 'return params._agg.terms.toArray()',
                        'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()',
                    }}
                    stats_name = encode_property(column.es_column)
                    if column.nested_path[0] == ".":
                        es_query.aggs[stats_name] = script
                        pulls.append(jx_expression_to_function(stats_name + ".value"))
                    else:
                        es_query.aggs[stats_name] = {
                            "nested": {"path": column.nested_path[0]},
                            "aggs": {"_nested": script}
                        }
                        pulls.append(jx_expression_to_function(stats_name + "._nested.value"))

                if len(pulls) == 0:
                    s.pull = NULL
                elif len(pulls) == 1:
                    s.pull = pulls[0]
                else:
                    s.pull = lambda row: UNION(p(row) for p in pulls)
            else:
                if len(columns) > 1:
                    Log.error("Do not know how to count columns with more than one type (script probably)")
                elif len(columns) <1:
                    # PULL VALUE OUT OF THE stats AGGREGATE
                    s.pull = jx_expression_to_function({"null":{}})
                else:
                    # PULL VALUE OUT OF THE stats AGGREGATE
                    es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column
                    s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]})

    for i, s in enumerate(formula):
        canonical_name = literal_field(s.name)

        if isinstance(s.value, TupleOp):
            if s.aggregate == "count":
                # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY
                s.pull = "doc_count"
            elif s.aggregate in ('max', 'maximum', 'min', 'minimum'):
                if s.aggregate in ('max', 'maximum'):
                    dir = 1
                    op = "max"
                else:
                    dir = -1
                    op = 'min'

                nully = TupleOp("tuple", [NULL]*len(s.value.terms)).partial_eval().to_es_script(schema).expr
                selfy = s.value.partial_eval().to_es_script(schema).expr

                script = {"scripted_metric": {
                    'init_script': 'params._agg.best = ' + nully + ';',
                    'map_script': 'params._agg.best = ' + expand_template(MAX_OF_TUPLE, {"expr1": "params._agg.best", "expr2": selfy, "dir": dir, "op": op}) + ";",
                    'combine_script': 'return params._agg.best',
                    'reduce_script': 'return params._aggs.stream().max(' + expand_template(COMPARE_TUPLE, {"dir": dir, "op": op}) + ').get()',
                }}
                if schema.query_path[0] == ".":
                    es_query.aggs[canonical_name] = script
                    s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
                else:
                    es_query.aggs[canonical_name] = {
                        "nested": {"path": schema.query_path[0]},
                        "aggs": {"_nested": script}
                    }
                    s.pull = jx_expression_to_function(literal_field(canonical_name) + "._nested.value")
            else:
               Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate)
        elif s.aggregate == "count":
            es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema)
            s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")

            es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
            es_query.aggs[key].percentiles.percents += [50]
            s.pull = jx_expression_to_function(key + ".values.50\\.0")
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = Math.round(s.percentile * 100, decimal=6)

            es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
            es_query.aggs[key].percentiles.percents += [percent]
            s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"

            es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema)
            s.pull = jx_expression_to_function(key + ".value")
        elif s.aggregate == "stats":
            # REGULAR STATS
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema)

            # GET MEDIAN TOO!
            median_name = literal_field(canonical_name + " percentile")
            es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema)
            es_query.aggs[median_name].percentiles.percents += [50]

            s.pull = get_pull_stats(stats_name, median_name)
        elif s.aggregate == "union":
            # USE TERMS AGGREGATE TO SIMULATE union
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema)
            s.pull = jx_expression_to_function(stats_name + ".buckets.key")
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate])
            es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema)

    decoders = get_decoders_by_depth(query)
    start = 0

    # <TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
    split_where = split_expression_by_depth(query.where, schema=frum.schema)

    if len(split_field(frum.name)) > 1:
        if any(split_where[2::]):
            Log.error("Where clause is too deep")

        for d in decoders[1]:
            es_query = d.append_query(es_query, start)
            start += d.num_columns

        if split_where[1]:
            #TODO: INCLUDE FILTERS ON EDGES
            filter_ = AndOp("and", split_where[1]).to_esfilter(schema)
            es_query = Data(
                aggs={"_filter": set_default({"filter": filter_}, es_query)}
            )

        es_query = wrap({
            "aggs": {"_nested": set_default(
                {"nested": {"path": schema.query_path[0]}},
                es_query
            )}
        })
    else:
        if any(split_where[1::]):
            Log.error("Where clause is too deep")

    if decoders:
        for d in jx.reverse(decoders[0]):
            es_query = d.append_query(es_query, start)
            start += d.num_columns

    if split_where[0]:
        #TODO: INCLUDE FILTERS ON EDGES
        filter = AndOp("and", split_where[0]).to_esfilter(schema)
        es_query = Data(
            aggs={"_filter": set_default({"filter": filter}, es_query)}
        )
    # </TERRIBLE SECTION>

    if not es_query:
        es_query = wrap({"query": {"match_all": {}}})

    es_query.size = 0

    with Timer("ES query time") as es_duration:
        result = es_post(es, es_query, query.limit)

    try:
        format_time = Timer("formatting")
        with format_time:
            decoders = [d for ds in decoders for d in ds]
            result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total)  # IT APPEARS THE OLD doc_count IS GONE

            formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
            if query.edges:
                output = formatter(decoders, result.aggregations, start, query, select)
            elif query.groupby:
                output = groupby_formatter(decoders, result.aggregations, start, query, select)
            else:
                output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.timing.formatting = format_time.duration
        output.meta.timing.es_search = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e)
        Log.error("Some problem", cause=e)
Beispiel #55
0
def es_aggsop(es, frum, query):
    query = query.copy()  # WE WILL MARK UP THIS QUERY
    schema = frum.schema
    select = listwrap(query.select)

    es_query = Data()
    new_select = Data()  # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
    formula = []
    for s in select:
        if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".":
            if schema.query_path == ".":
                s.pull = jx_expression_to_function("doc_count")
            else:
                s.pull = jx_expression_to_function({"coalesce": ["_nested.doc_count", "doc_count", 0]})
        elif isinstance(s.value, Variable):
            if s.aggregate == "count":
                new_select["count_"+literal_field(s.value.var)] += [s]
            else:
                new_select[literal_field(s.value.var)] += [s]
        elif s.aggregate:
            formula.append(s)

    for canonical_name, many in new_select.items():
        for s in many:
            columns = frum.schema.values(s.value.var)

            if s.aggregate == "count":
                canonical_names = []
                for column in columns:
                    cn = literal_field(column.es_column + "_count")
                    if column.jx_type == EXISTS:
                        canonical_names.append(cn + ".doc_count")
                        es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}}
                    else:
                        canonical_names.append(cn+ ".value")
                        es_query.aggs[cn].value_count.field = column.es_column
                if len(canonical_names) == 1:
                    s.pull = jx_expression_to_function(canonical_names[0])
                else:
                    s.pull = jx_expression_to_function({"add": canonical_names})
            elif s.aggregate == "median":
                if len(columns) > 1:
                    Log.error("Do not know how to count columns with more than one type (script probably)")
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")

                es_query.aggs[key].percentiles.field = columns[0].es_column
                es_query.aggs[key].percentiles.percents += [50]
                s.pull = jx_expression_to_function(key + ".values.50\\.0")
            elif s.aggregate == "percentile":
                if len(columns) > 1:
                    Log.error("Do not know how to count columns with more than one type (script probably)")
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")
                if isinstance(s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile:
                    Log.error("Expecting percentile to be a float from 0.0 to 1.0")
                percent = Math.round(s.percentile * 100, decimal=6)

                es_query.aggs[key].percentiles.field = columns[0].es_column
                es_query.aggs[key].percentiles.percents += [percent]
                s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
            elif s.aggregate == "cardinality":
                canonical_names = []
                for column in columns:
                    cn = literal_field(column.es_column + "_cardinality")
                    canonical_names.append(cn)
                    es_query.aggs[cn].cardinality.field = column.es_column
                if len(columns) == 1:
                    s.pull = jx_expression_to_function(canonical_names[0] + ".value")
                else:
                    s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0})
            elif s.aggregate == "stats":
                if len(columns) > 1:
                    Log.error("Do not know how to count columns with more than one type (script probably)")
                # REGULAR STATS
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].extended_stats.field = columns[0].es_column

                # GET MEDIAN TOO!
                median_name = literal_field(canonical_name + "_percentile")
                es_query.aggs[median_name].percentiles.field = columns[0].es_column
                es_query.aggs[median_name].percentiles.percents += [50]

                s.pull = get_pull_stats(stats_name, median_name)
            elif s.aggregate == "union":
                pulls = []
                for column in columns:
                    script = {"scripted_metric": {
                        'init_script': 'params._agg.terms = new HashSet()',
                        'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.add(v)',
                        'combine_script': 'return params._agg.terms.toArray()',
                        'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()',
                    }}
                    stats_name = encode_property(column.es_column)
                    if column.nested_path[0] == ".":
                        es_query.aggs[stats_name] = script
                        pulls.append(jx_expression_to_function(stats_name + ".value"))
                    else:
                        es_query.aggs[stats_name] = {
                            "nested": {"path": column.nested_path[0]},
                            "aggs": {"_nested": script}
                        }
                        pulls.append(jx_expression_to_function(stats_name + "._nested.value"))

                if len(pulls) == 0:
                    s.pull = NULL
                elif len(pulls) == 1:
                    s.pull = pulls[0]
                else:
                    s.pull = lambda row: UNION(p(row) for p in pulls)
            else:
                if len(columns) > 1:
                    Log.error("Do not know how to count columns with more than one type (script probably)")
                elif len(columns) <1:
                    # PULL VALUE OUT OF THE stats AGGREGATE
                    s.pull = jx_expression_to_function({"null":{}})
                else:
                    # PULL VALUE OUT OF THE stats AGGREGATE
                    es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column
                    s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]})

    for i, s in enumerate(formula):
        canonical_name = literal_field(s.name)

        if isinstance(s.value, TupleOp):
            if s.aggregate == "count":
                # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY
                s.pull = "doc_count"
            elif s.aggregate in ('max', 'maximum', 'min', 'minimum'):
                if s.aggregate in ('max', 'maximum'):
                    dir = 1
                    op = "max"
                else:
                    dir = -1
                    op = 'min'

                nully = TupleOp("tuple", [NULL]*len(s.value.terms)).partial_eval().to_es_script(schema).expr
                selfy = s.value.partial_eval().to_es_script(schema).expr

                script = {"scripted_metric": {
                    'init_script': 'params._agg.best = ' + nully + ';',
                    'map_script': 'params._agg.best = ' + expand_template(MAX_OF_TUPLE, {"expr1": "params._agg.best", "expr2": selfy, "dir": dir, "op": op}) + ";",
                    'combine_script': 'return params._agg.best',
                    'reduce_script': 'return params._aggs.stream().max(' + expand_template(COMPARE_TUPLE, {"dir": dir, "op": op}) + ').get()',
                }}
                if schema.query_path[0] == ".":
                    es_query.aggs[canonical_name] = script
                    s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
                else:
                    es_query.aggs[canonical_name] = {
                        "nested": {"path": schema.query_path[0]},
                        "aggs": {"_nested": script}
                    }
                    s.pull = jx_expression_to_function(literal_field(canonical_name) + "._nested.value")
            else:
               Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate)
        elif s.aggregate == "count":
            es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema)
            s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")

            es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
            es_query.aggs[key].percentiles.percents += [50]
            s.pull = jx_expression_to_function(key + ".values.50\\.0")
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = Math.round(s.percentile * 100, decimal=6)

            es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
            es_query.aggs[key].percentiles.percents += [percent]
            s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"

            es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema)
            s.pull = jx_expression_to_function(key + ".value")
        elif s.aggregate == "stats":
            # REGULAR STATS
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema)

            # GET MEDIAN TOO!
            median_name = literal_field(canonical_name + " percentile")
            es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema)
            es_query.aggs[median_name].percentiles.percents += [50]

            s.pull = get_pull_stats(stats_name, median_name)
        elif s.aggregate == "union":
            # USE TERMS AGGREGATE TO SIMULATE union
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema)
            s.pull = jx_expression_to_function(stats_name + ".buckets.key")
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate])
            es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema)

    decoders = get_decoders_by_depth(query)
    start = 0

    #<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
    split_where = split_expression_by_depth(query.where, schema=frum.schema)

    if len(split_field(frum.name)) > 1:
        if any(split_where[2::]):
            Log.error("Where clause is too deep")

        for d in decoders[1]:
            es_query = d.append_query(es_query, start)
            start += d.num_columns

        if split_where[1]:
            #TODO: INCLUDE FILTERS ON EDGES
            filter_ = AndOp("and", split_where[1]).to_esfilter(schema)
            es_query = Data(
                aggs={"_filter": set_default({"filter": filter_}, es_query)}
            )

        es_query = wrap({
            "aggs": {"_nested": set_default(
                {"nested": {"path": schema.query_path[0]}},
                es_query
            )}
        })
    else:
        if any(split_where[1::]):
            Log.error("Where clause is too deep")

    if decoders:
        for d in jx.reverse(decoders[0]):
            es_query = d.append_query(es_query, start)
            start += d.num_columns

    if split_where[0]:
        #TODO: INCLUDE FILTERS ON EDGES
        filter = AndOp("and", split_where[0]).to_esfilter(schema)
        es_query = Data(
            aggs={"_filter": set_default({"filter": filter}, es_query)}
        )
    # </TERRIBLE SECTION>

    if not es_query:
        es_query = wrap({"query": {"match_all": {}}})

    es_query.size = 0

    with Timer("ES query time") as es_duration:
        result = es_post(es, es_query, query.limit)

    try:
        format_time = Timer("formatting")
        with format_time:
            decoders = [d for ds in decoders for d in ds]
            result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total)  # IT APPEARS THE OLD doc_count IS GONE

            formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
            if query.edges:
                output = formatter(decoders, result.aggregations, start, query, select)
            elif query.groupby:
                output = groupby_formatter(decoders, result.aggregations, start, query, select)
            else:
                output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.timing.formatting = format_time.duration
        output.meta.timing.es_search = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e)
        Log.error("Some problem", cause=e)
Beispiel #56
0
    def _update_cardinality(self, column):
        """
        QUERY ES TO FIND CARDINALITY AND PARTITIONS FOR A SIMPLE COLUMN
        """
        if column.es_index in self.index_does_not_exist:
            return

        if column.type in STRUCT:
            Log.error("not supported")
        try:
            if column.es_index == "meta.columns":
                with self.meta.columns.locker:
                    partitions = jx.sort([
                        g[column.es_column] for g, _ in jx.groupby(
                            self.meta.columns, column.es_column)
                        if g[column.es_column] != None
                    ])
                    self.meta.columns.update({
                        "set": {
                            "partitions": partitions,
                            "count": len(self.meta.columns),
                            "cardinality": len(partitions),
                            "multi": 1,
                            "last_updated": Date.now()
                        },
                        "where": {
                            "eq": {
                                "es_index": column.es_index,
                                "es_column": column.es_column
                            }
                        }
                    })
                return
            if column.es_index == "meta.tables":
                with self.meta.columns.locker:
                    partitions = jx.sort([
                        g[column.es_column] for g, _ in jx.groupby(
                            self.meta.tables, column.es_column)
                        if g[column.es_column] != None
                    ])
                    self.meta.columns.update({
                        "set": {
                            "partitions": partitions,
                            "count": len(self.meta.tables),
                            "cardinality": len(partitions),
                            "multi": 1,
                            "last_updated": Date.now()
                        },
                        "where": {
                            "eq": {
                                "es_index": column.es_index,
                                "es_column": column.es_column
                            }
                        }
                    })
                return

            es_index = column.es_index.split(".")[0]

            is_text = [
                cc for cc in self.abs_columns
                if cc.es_column == column.es_column and cc.type == "text"
            ]
            if is_text:
                # text IS A MULTIVALUE STRING THAT CAN ONLY BE FILTERED
                result = self.default_es.post("/" + es_index + "/_search",
                                              data={
                                                  "aggs": {
                                                      "count": {
                                                          "filter": {
                                                              "match_all": {}
                                                          }
                                                      }
                                                  },
                                                  "size": 0
                                              })
                count = result.hits.total
                cardinality = 1001
                multi = 1001
            elif column.es_column == "_id":
                result = self.default_es.post("/" + es_index + "/_search",
                                              data={
                                                  "query": {
                                                      "match_all": {}
                                                  },
                                                  "size": 0
                                              })
                count = cardinality = result.hits.total
                multi = 1
            else:
                result = self.default_es.post(
                    "/" + es_index + "/_search",
                    data={
                        "aggs": {
                            "count": _counting_query(column),
                            "multi": {
                                "max": {
                                    "script":
                                    "doc[" + quote(column.es_column) +
                                    "].values.size()"
                                }
                            }
                        },
                        "size": 0
                    })
                r = result.aggregations.count
                count = result.hits.total
                cardinality = coalesce(r.value, r._nested.value, r.doc_count)
                multi = coalesce(r.multi.value, 1)
                if cardinality == None:
                    Log.error("logic error")

            query = Data(size=0)

            if column.es_column == "_id":
                with self.meta.columns.locker:
                    self.meta.columns.update({
                        "set": {
                            "count": cardinality,
                            "cardinality": cardinality,
                            "multi": 1,
                            "last_updated": Date.now()
                        },
                        "clear": ["partitions"],
                        "where": {
                            "eq": {
                                "es_index": column.es_index,
                                "es_column": column.es_column
                            }
                        }
                    })
                return
            elif cardinality > 1000 or (count >= 30 and cardinality == count
                                        ) or (count >= 1000
                                              and cardinality / count > 0.99):
                if DEBUG:
                    Log.note("{{table}}.{{field}} has {{num}} parts",
                             table=column.es_index,
                             field=column.es_column,
                             num=cardinality)
                with self.meta.columns.locker:
                    self.meta.columns.update({
                        "set": {
                            "count": count,
                            "cardinality": cardinality,
                            "multi": multi,
                            "last_updated": Date.now()
                        },
                        "clear": ["partitions"],
                        "where": {
                            "eq": {
                                "es_index": column.es_index,
                                "es_column": column.es_column
                            }
                        }
                    })
                return
            elif column.type in elasticsearch.ES_NUMERIC_TYPES and cardinality > 30:
                if DEBUG:
                    Log.note("{{field}} has {{num}} parts",
                             field=column.es_index,
                             num=cardinality)
                with self.meta.columns.locker:
                    self.meta.columns.update({
                        "set": {
                            "count": count,
                            "cardinality": cardinality,
                            "multi": multi,
                            "last_updated": Date.now()
                        },
                        "clear": ["partitions"],
                        "where": {
                            "eq": {
                                "es_index": column.es_index,
                                "es_column": column.es_column
                            }
                        }
                    })
                return
            elif len(column.nested_path) != 1:
                query.aggs["_"] = {
                    "nested": {
                        "path": column.nested_path[0]
                    },
                    "aggs": {
                        "_nested": {
                            "terms": {
                                "field": column.es_column
                            }
                        }
                    }
                }
            elif cardinality == 0:
                query.aggs["_"] = {"terms": {"field": column.es_column}}
            else:
                query.aggs["_"] = {
                    "terms": {
                        "field": column.es_column,
                        "size": cardinality
                    }
                }

            result = self.default_es.post("/" + es_index + "/_search",
                                          data=query)

            aggs = result.aggregations._
            if aggs._nested:
                parts = jx.sort(aggs._nested.buckets.key)
            else:
                parts = jx.sort(aggs.buckets.key)

            if DEBUG:
                Log.note("{{field}} has {{parts}}",
                         field=column.names["."],
                         parts=parts)
            with self.meta.columns.locker:
                self.meta.columns.update({
                    "set": {
                        "count": count,
                        "cardinality": cardinality,
                        "multi": multi,
                        "partitions": parts,
                        "last_updated": Date.now()
                    },
                    "where": {
                        "eq": {
                            "es_index": column.es_index,
                            "es_column": column.es_column
                        }
                    }
                })
        except Exception as e:
            # CAN NOT IMPORT: THE TEST MODULES SETS UP LOGGING
            # from tests.test_jx import TEST_TABLE
            TEST_TABLE = "testdata"
            is_missing_index = any(
                w in e for w in
                ["IndexMissingException", "index_not_found_exception"])
            is_test_table = any(
                column.es_index.startswith(t)
                for t in [TEST_TABLE_PREFIX, TEST_TABLE])
            if is_missing_index and is_test_table:
                # WE EXPECT TEST TABLES TO DISAPPEAR
                with self.meta.columns.locker:
                    self.meta.columns.update({
                        "clear": ".",
                        "where": {
                            "eq": {
                                "es_index": column.es_index
                            }
                        }
                    })
                self.index_does_not_exist.add(column.es_index)
            else:
                self.meta.columns.update({
                    "set": {
                        "last_updated": Date.now()
                    },
                    "clear": [
                        "count",
                        "cardinality",
                        "multi",
                        "partitions",
                    ],
                    "where": {
                        "eq": {
                            "names.\\.": ".",
                            "es_index": column.es_index,
                            "es_column": column.es_column
                        }
                    }
                })
                Log.warning(
                    "Could not get {{col.es_index}}.{{col.es_column}} info",
                    col=column,
                    cause=e)
Beispiel #57
0
def quote_table(column):
    if _no_need_to_quote.match(column):
        return column
    return quote(column)