def to_esfilter(self, schema): if is_op(self.value, Variable_): var = self.value.var cols = schema.leaves(var) if not cols: Log.error("expecting {{var}} to be a column", var=var) col = first(cols) var = col.es_column if col.jx_type == BOOLEAN: if is_literal(self.superset) and not is_sequence( self.superset.value): return {"term": {var: value2boolean(self.superset.value)}} else: return { "terms": { var: map(value2boolean, self.superset.value) } } else: if is_literal(self.superset) and not is_sequence( self.superset.value): return {"term": {var: self.superset.value}} else: return {"terms": {var: self.superset.value}} else: return Painless[self].to_es_script(schema).to_esfilter(schema)
def define(cls, expr): term = expr.between if is_sequence(term): return cls.lang[BetweenOp( value=jx_expression(term[0]), prefix=jx_expression(term[1]), suffix=jx_expression(term[2]), default=jx_expression(expr.default), start=jx_expression(expr.start), )] elif is_data(term): var, vals = term.items()[0] if is_sequence(vals) and len(vals) == 2: return cls.lang[BetweenOp( value=Variable(var), prefix=Literal(vals[0]), suffix=Literal(vals[1]), default=jx_expression(expr.default), start=jx_expression(expr.start), )] else: Log.error( "`between` parameters are expected to be in {var: [prefix, suffix]} form" ) else: Log.error( "`between` parameters are expected to be in {var: [prefix, suffix]} form" )
def value2key(keys, val): if len(keys) == 1: if is_data(val): return get_attr(val, keys[0]), elif is_sequence(val): return val[0], return val, else: if is_data(val): return tuple(val[k] for k in keys) elif is_sequence(val): return tuple(val) else: Log.error("do not know what to do here")
def value2key(keys, val): if len(keys) == 1: if is_data(val): return val[keys[0]] elif is_sequence(val): return val[0] else: return val else: if is_data(val): return datawrap({k: val[k] for k in keys}) elif is_sequence(val): return datawrap(dict(zip(keys, val))) else: Log.error("do not know what to do here")
def __getitem__(self, index): if not is_sequence(index): if isinstance(index, slice): sub = self.cube[index] output = Matrix() output.num = 1 output.dims = (len(sub), ) output.cube = sub return output else: return self.cube[index] if len(index) == 0: return self.cube dims, cube = _getitem(self.cube, index) if len(dims) == 0: return cube # SIMPLE VALUE output = Matrix(dims=[]) output.num = len(dims) output.dims = dims output.cube = cube return output
def _deep_json_to_string(value, depth): """ :param value: SOME STRUCTURE :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED :return: FLATTER STRUCTURE """ if is_data(value): if depth == 0: return strings.limit(value2json(value), LOG_STRING_LENGTH) return { k: _deep_json_to_string(v, depth - 1) for k, v in value.items() } elif is_sequence(value): return strings.limit(value2json(value), LOG_STRING_LENGTH) elif isinstance(value, number_types): return value elif is_text(value): return strings.limit(value, LOG_STRING_LENGTH) elif is_binary(value): return strings.limit(bytes2base64(value), LOG_STRING_LENGTH) elif isinstance(value, (date, datetime)): return datetime2unix(value) else: return strings.limit(value2json(value), LOG_STRING_LENGTH)
def __init__(self, concat): SQL.__init__(self) if not is_sequence(concat): concat = list(concat) if DEBUG and any(not isinstance(s, SQL) for s in concat): Log.error("Can only join other SQL") self.concat = concat
def __init__(self, terms): Expression.__init__(self, terms) if is_sequence(terms): self.lhs, self.rhs = terms elif is_data(terms): self.rhs, self.lhs = terms.items()[0] else: Log.error("logic error")
def parse_hg_date(date): if is_text(date): return Date(date) elif is_sequence(date): # FIRST IN TUPLE (timestamp, time_zone) TUPLE, WHERE timestamp IS GMT return Date(date[0]) else: Log.error("Can not deal with date like {{date|json}}", date=date)
def __call__(self, row, rownum=None, rows=None): path = split_field(self.var) for p in path: row = row.get(p) if row is None: return None if is_sequence(row) and len(row) == 1: return row[0] return row
def __getitem__(self, key): try: if is_sequence(key) and len(key) < len(self._keys): # RETURN ANOTHER Index raise NotImplementedError() key = value2key(self._keys, key) return wrap(copy(self._data.get(key, []))) except Exception as e: Log.error("something went wrong", e)
def _replace_locals(node, doc_path): if is_data(node): # RECURS, DEEP COPY ref = None output = {} for k, v in node.items(): if k == "$ref": ref = v elif k == "$concat": if not is_sequence(v): Log.error("$concat expects an array of strings") return coalesce(node.get("separator"), "").join(v) elif v == None: continue else: output[k] = _replace_locals(v, [v] + doc_path) if not ref: return output # REFER TO SELF frag = ref.fragment if frag[0] == ".": # RELATIVE for i, p in enumerate(frag): if p != ".": if i > len(doc_path): Log.error( "{{frag|quote}} reaches up past the root document", frag=frag) new_value = get_attr(doc_path[i - 1], frag[i::]) break else: new_value = doc_path[len(frag) - 1] else: # ABSOLUTE new_value = get_attr(doc_path[-1], frag) new_value = _replace_locals(new_value, [new_value] + doc_path) if not output: return new_value # OPTIMIZATION FOR CASE WHEN node IS {} else: return unwrap(set_default(output, new_value)) elif is_list(node): candidate = [_replace_locals(n, [n] + doc_path) for n in node] # if all(p[0] is p[1] for p in zip(candidate, node)): # return node return candidate return node
def __init__(self, terms, **clauses): if not is_sequence(terms): Log.error("case expression requires a list of `when` sub-clauses") Expression.__init__(self, terms) if len(terms) == 0: Log.error("Expecting at least one clause") for w in terms[:-1]: if not is_op(w, WhenOp) or w.els_ is not NULL: Log.error( "case expression does not allow `else` clause in `when` sub-clause" ) self.whens = terms
def _jx_expression(expr, lang): """ WRAP A JSON EXPRESSION WITH OBJECT REPRESENTATION """ if is_expression(expr): # CONVERT TO lang new_op = lang[expr] if not new_op: # CAN NOT BE FOUND, TRY SOME PARTIAL EVAL return language[expr.get_id()].partial_eval() return expr # return new_op(expr.args) # THIS CAN BE DONE, BUT IT NEEDS MORE CODING, AND I WOULD EXPECT IT TO BE SLOW if expr is None: return TRUE elif is_text(expr): return Variable(expr) elif expr in (True, False, None) or expr == None or is_number(expr): return Literal(expr) elif expr.__class__ is Date: return Literal(expr.unix) elif is_sequence(expr): return lang[TupleOp([_jx_expression(e, lang) for e in expr])] # expr = to_data(expr) try: items = items_(expr) for op, term in items: # ONE OF THESE IS THE OPERATOR full_op = operators.get(op) if full_op: class_ = lang.ops[full_op.get_id()] if class_: return class_.define(expr) # THIS LANGUAGE DOES NOT SUPPORT THIS OPERATOR, GOTO BASE LANGUAGE AND GET THE MACRO class_ = language[op.get_id()] output = class_.define(expr).partial_eval() return _jx_expression(output, lang) else: if not items: return NULL raise Log.error("{{instruction|json}} is not known", instruction=expr) except Exception as e: Log.error("programmer error expr = {{value|quote}}", value=expr, cause=e)
def __init__(self, args): self.simplified = False # SOME BASIC VERIFICATION THAT THESE ARE REASONABLE PARAMETERS if is_sequence(args): bad = [t for t in args if t != None and not is_expression(t)] if bad: Log.error("Expecting an expression, not {{bad}}", bad=bad) elif is_data(args): if not all(is_op(k, Variable) and is_literal(v) for k, v in args.items()): Log.error("Expecting an {<variable>: <literal>}") elif args == None: pass else: if not is_expression(args): Log.error("Expecting an expression")
def get_type(v): if v == None: return None elif isinstance(v, bool): return BOOLEAN elif is_text(v): return STRING elif is_data(v): return OBJECT elif isinstance(v, float): if isnan(v) or abs(v) == POS_INF: return None return NUMBER elif isinstance(v, (int, Date)): return NUMBER elif is_sequence(v): return NESTED return None
def replacer(found): ops = found.group(1).split("|") path = ops[0] var = path.lstrip(".") depth = min(len(seq), max(1, len(path) - len(var))) try: val = seq[-depth] if var: if is_sequence(val) and float(var) == _round(float(var), 0): val = val[int(var)] else: val = val[var] for func_name in ops[1:]: parts = func_name.split("(") if len(parts) > 1: val = eval(parts[0] + "(val, " + "(".join(parts[1::])) else: val = FORMATTERS[func_name](val) val = toString(val) return val except Exception as e: from mo_logs import Except e = Except.wrap(e) try: if e.message.find("is not JSON serializable"): # WORK HARDER val = toString(val) return val except Exception as f: if not _Log: _late_import() _Log.warning( "Can not expand " + "|".join(ops) + " in template: {{template_|json}}", template_=template, cause=e, ) return "[template expansion error: (" + str(e.message) + ")]"
def replacer(found): ops = found.group(1).split("|") path = ops[0] var = path.lstrip(".") depth = min(len(seq), max(1, len(path) - len(var))) try: val = seq[-depth] if var: if is_sequence(val) and float(var) == _round(float(var), 0): val = val[int(var)] else: val = val[var] for func_name in ops[1:]: parts = func_name.split('(') if len(parts) > 1: val = eval(parts[0] + "(val, " + ("(".join(parts[1::]))) else: val = FORMATTERS[func_name](val) val = toString(val) return val except Exception as e: from mo_logs import Except e = Except.wrap(e) try: if e.message.find("is not JSON serializable"): # WORK HARDER val = toString(val) return val except Exception as f: if not _Log: _late_import() _Log.warning( "Can not expand " + "|".join(ops) + " in template: {{template_|json}}", template_=template, cause=e ) return "[template expansion error: (" + str(e.message) + ")]"
def _deep_json_to_string(value, depth): """ :param value: SOME STRUCTURE :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED :return: FLATTER STRUCTURE """ if is_data(value): if depth == 0: return strings.limit(value2json(value), LOG_STRING_LENGTH) return {k: _deep_json_to_string(v, depth - 1) for k, v in value.items()} elif is_sequence(value): return strings.limit(value2json(value), LOG_STRING_LENGTH) elif isinstance(value, number_types): return value elif is_text(value): return strings.limit(value, LOG_STRING_LENGTH) elif is_binary(value): return strings.limit(bytes2base64(value), LOG_STRING_LENGTH) elif isinstance(value, (date, datetime)): return datetime2unix(value) else: return strings.limit(value2json(value), LOG_STRING_LENGTH)
def _test_contains(self, key): try: if is_sequence(key) and len(key) < len(self._keys): # RETURN ANOTHER Index length = len(key) key = value2key(self._keys[0:length:], key) d = self._data for k in key[:length]: try: d = d[k] except Exception as e: return False return True key = value2key(self._keys, key) d = self._data for k in key: try: d = d[k] except Exception as e: return False return True except Exception as e: Log.error("something went wrong", e)
def request(method, url, headers=None, data=None, json=None, zip=None, retry=None, timeout=None, session=None, kwargs=None): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None :param method: GET, POST, etc :param url: URL :param headers: dict OF HTTP REQUEST HEADERS :param data: BYTES (OR GENERATOR OF BYTES) :param json: JSON-SERIALIZABLE STRUCTURE :param zip: ZIP THE REQUEST BODY, IF BIG ENOUGH :param retry: {"times": x, "sleep": y} STRUCTURE :param timeout: SECONDS TO WAIT FOR RESPONSE :param session: Session OBJECT, IF YOU HAVE ONE :param kwargs: ALL PARAMETERS (DO NOT USE) :return: """ global _warning_sent global request_count if not _warning_sent and not default_headers: Log.warning( text( "The mo_http.http module was meant to add extra " + "default headers to all requests, specifically the 'Referer' " + "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " + "function to set `mo_http.http.default_headers`")) _warning_sent = True if is_list(url): # TRY MANY URLS failures = [] for remaining, u in countdown(url): try: response = request(url=u, kwargs=kwargs) if mo_math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception as e: e = Except.wrap(e) failures.append(e) Log.error(u"Tried {{num}} urls", num=len(url), cause=failures) if session: close_after_response = Null else: close_after_response = session = sessions.Session() with closing(close_after_response): if PY2 and is_text(url): # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE url = url.encode('ascii') try: set_default(kwargs, DEFAULTS) # HEADERS headers = unwrap( set_default(headers, session.headers, default_headers)) _to_ascii_dict(headers) # RETRY retry = wrap(retry) if retry == None: retry = set_default({}, DEFAULTS['retry']) elif isinstance(retry, Number): retry = set_default({"times": retry}, DEFAULTS['retry']) elif isinstance(retry.sleep, Duration): retry.sleep = retry.sleep.seconds # JSON if json != None: data = value2json(json).encode('utf8') # ZIP zip = coalesce(zip, DEFAULTS['zip']) set_default(headers, {'Accept-Encoding': 'compress, gzip'}) if zip: if is_sequence(data): compressed = ibytes2icompressed(data) headers['content-encoding'] = 'gzip' data = compressed elif len(coalesce(data)) > 1000: compressed = bytes2zip(data) headers['content-encoding'] = 'gzip' data = compressed except Exception as e: Log.error(u"Request setup failure on {{url}}", url=url, cause=e) errors = [] for r in range(retry.times): if r: Till(seconds=retry.sleep).wait() try: request_count += 1 with Timer("http {{method|upper}} to {{url}}", param={ "method": method, "url": text(url) }, verbose=DEBUG): return _session_request(session, url=str(url), headers=headers, data=data, json=None, kwargs=kwargs) except Exception as e: e = Except.wrap(e) if retry['http'] and str(url).startswith( "https://" ) and "EOF occurred in violation of protocol" in e: url = URL("http://" + str(url)[8:]) Log.note( "Changed {{url}} to http due to SSL EOF violation.", url=str(url)) errors.append(e) if " Read timed out." in errors[0]: Log.error( u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=timeout, times=retry.times, cause=errors[0]) else: Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])