def _convert_edge(self, edge): if is_text(edge): return Data( name=edge, value=edge, domain=self._convert_domain() ) else: edge = wrap(edge) if not edge.name and not is_text(edge.value): Log.error("You must name compound edges: {{edge}}", edge= edge) if edge.value.__class__ in (Data, dict, list, FlatList) and not edge.domain: # COMPLEX EDGE IS SHORT HAND domain =self._convert_domain() domain.dimension = Data(fields=edge.value) return Data( name=edge.name, allowNulls=False if edge.allowNulls is False else True, domain=domain ) domain = self._convert_domain(edge.domain) return Data( name=coalesce(edge.name, edge.value), value=edge.value, range=edge.range, allowNulls=False if edge.allowNulls is False else True, domain=domain )
def parse(*args): try: if len(args) == 1: a0 = args[0] if isinstance(a0, (datetime, date)): output = _unix2Date(datetime2unix(a0)) elif isinstance(a0, Date): output = _unix2Date(a0.unix) elif isinstance(a0, (int, long, float, Decimal)): a0 = float(a0) if a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = _unix2Date(a0 / 1000) else: output = _unix2Date(a0) elif is_text(a0) and len(a0) in [9, 10, 12, 13] and mo_math.is_integer(a0): a0 = float(a0) if a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = _unix2Date(a0 / 1000) else: output = _unix2Date(a0) elif is_text(a0): output = unicode2Date(a0) else: output = _unix2Date(datetime2unix(datetime(*args))) else: if is_text(args[0]): output = unicode2Date(*args) else: output = _unix2Date(datetime2unix(datetime(*args))) return output except Exception as e: from mo_logs import Log Log.error("Can not convert {{args}} to Date", args=args, cause=e)
def _select_a_field(field): if is_text(field): return wrap({"name": field, "value": split_field(field)}) elif is_text(wrap(field).value): field = wrap(field) return wrap({"name": field.name, "value": split_field(field.value)}) else: return wrap({"name": field.name, "value": field.value})
def _normalize_group(edge, dim_index, limit, schema=None): """ :param edge: Not normalized groupby :param dim_index: Dimensions are ordered; this is this groupby's index into that order :param schema: for context :return: a normalized groupby """ if is_text(edge): if edge.endswith(".*"): prefix = edge[:-2] if schema: output = wrap([ { # BECASUE THIS IS A GROUPBY, EARLY SPLIT INTO LEAVES WORKS JUST FINE "name": concat_field(prefix, literal_field(relative_field(untype_path(c.name), prefix))), "put": {"name": literal_field(untype_path(c.name))}, "value": jx_expression(c.es_column, schema=schema), "allowNulls": True, "domain": {"type": "default"} } for c in schema.leaves(prefix) ]) return output else: return wrap([{ "name": untype_path(prefix), "put": {"name": literal_field(untype_path(prefix))}, "value": LeavesOp(Variable(prefix)), "allowNulls": True, "dim":dim_index, "domain": {"type": "default"} }]) return wrap([{ "name": edge, "value": jx_expression(edge, schema=schema), "allowNulls": True, "dim": dim_index, "domain": Domain(type="default", limit=limit) }]) else: edge = wrap(edge) if (edge.domain and edge.domain.type != "default") or edge.allowNulls != None: Log.error("groupby does not accept complicated domains") if not edge.name and not is_text(edge.value): Log.error("You must name compound edges: {{edge}}", edge= edge) return wrap([{ "name": coalesce(edge.name, edge.value), "value": jx_expression(edge.value, schema=schema), "allowNulls": True, "dim":dim_index, "domain": {"type": "default"} }])
def _to_ascii_dict(headers): if headers is None: return for k, v in copy(headers).items(): if is_text(k): del headers[k] if is_text(v): headers[k.encode('ascii')] = v.encode('ascii') else: headers[k.encode('ascii')] = v elif is_text(v): headers[k] = v.encode('ascii')
def _normalize_select_no_context(select, schema=None): """ SAME NORMALIZE, BUT NO SOURCE OF COLUMNS """ if not _Column: _late_import() if is_text(select): select = Data(value=select) else: select = wrap(select) output = select.copy() if not select.value: output.name = coalesce(select.name, select.aggregate) if output.name: output.value = jx_expression(".", schema=schema) else: return Null elif is_text(select.value): if select.value.endswith(".*"): name = select.value[:-2].lstrip(".") output.name = coalesce(select.name, name) output.value = LeavesOp(Variable(name), prefix=coalesce(select.prefix, name)) else: if select.value == ".": output.name = coalesce(select.name, select.aggregate, ".") output.value = jx_expression(select.value, schema=schema) elif select.value == "*": output.name = coalesce(select.name, select.aggregate, ".") output.value = LeavesOp(Variable(".")) else: output.name = coalesce(select.name, select.value.lstrip("."), select.aggregate) output.value = jx_expression(select.value, schema=schema) elif is_number(output.value): if not output.name: output.name = text_type(output.value) output.value = jx_expression(select.value, schema=schema) else: output.value = jx_expression(select.value, schema=schema) if not output.name: Log.error("expecting select to have a name: {{select}}", select= select) if output.name.endswith(".*"): Log.error("{{name|quote}} is invalid select", name=output.name) output.aggregate = coalesce(canonical_aggregates[select.aggregate].name, select.aggregate, "none") output.default = coalesce(select.default, canonical_aggregates[output.aggregate].default) return output
def _convert_from(self, frum): if is_text(frum): return Data(name=frum) elif is_op(frum, (Container, Variable)): return frum else: Log.error("Expecting from clause to be a name, or a container")
def wrap_function(func): """ RETURN A THREE-PARAMETER WINDOW FUNCTION TO MATCH """ if is_text(func): return compile_expression(func) numarg = func.__code__.co_argcount if numarg == 0: def temp(row, rownum, rows): return func() return temp elif numarg == 1: def temp(row, rownum, rows): return func(row) return temp elif numarg == 2: def temp(row, rownum, rows): return func(row, rownum) return temp elif numarg == 3: return func
def _scrub(result): if is_text(result): return result elif is_binary(result): return result.decode('utf8') elif isinstance(result, number_types): return result elif not result: return {} elif isinstance(result, (list, ParseResults)): if not result: return None elif len(result) == 1: return _scrub(result[0]) else: output = [rr for r in result for rr in [_scrub(r)] if rr != None] # IF ALL MEMBERS OF A LIST ARE LITERALS, THEN MAKE THE LIST LITERAL if all(is_data(r) and "literal" in r.keys() for r in output): output = {"literal": [r['literal'] for r in output]} return output elif not list(result.items()): return {} else: return { k: vv for k, v in result.items() for vv in [_scrub(v)] if vv != None }
def tuple(data, field_name): """ RETURN LIST OF TUPLES """ if isinstance(data, Cube): Log.error("not supported yet") if isinstance(data, FlatList): Log.error("not supported yet") if is_data(field_name) and "value" in field_name: # SIMPLIFY {"value":value} AS STRING field_name = field_name["value"] # SIMPLE PYTHON ITERABLE ASSUMED if is_text(field_name): if len(split_field(field_name)) == 1: return [(d[field_name], ) for d in data] else: path = split_field(field_name) output = [] flat_list._tuple1(data, path, 0, output) return output elif is_list(field_name): paths = [_select_a_field(f) for f in field_name] output = FlatList() _tuple((), unwrap(data), paths, 0, output) return output else: paths = [_select_a_field(field_name)] output = FlatList() _tuple((), data, paths, 0, output) return output
def string2url(value): if is_text(value): return "".join([_map2url[c] for c in unicode2latin1(value)]) elif is_binary(value): return "".join([_map2url[c] for c in value]) else: Log.error("Expecting a string")
def __init__(self, stream): assert stream if is_text(stream): name = stream stream = self.stream = eval(stream) if name.startswith("sys.") and PY3: self.stream = Data(write=lambda d: stream.write(d.decode('utf8'))) else: name = "stream" self.stream = stream # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD from mo_threads import Queue def utf8_appender(value): if is_text(value): value = value.encode('utf8') self.stream.write(value) appender = utf8_appender self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True) self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start()
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): return coalesce(self.dimensions[expr], expr) elif is_text(expr): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif is_op(expr, QueryOp): return self._convert_query(expr) elif is_data(expr): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return dict_to_data({ name: self.convert(value) for name, value in expr.leaves() }) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif is_many(expr): return list_to_data([self.convert(value) for value in expr]) else: return expr
def __new__(cls, value=None, **kwargs): output = object.__new__(cls) if value == None: if kwargs: output.milli = datetime.timedelta(**kwargs).total_seconds() * 1000 output.month = 0 return output else: return None if is_number(value): output._milli = float(value) * 1000 output.month = 0 return output elif is_text(value): return parse(value) elif isinstance(value, Duration): output.milli = value.milli output.month = value.month return output elif isinstance(value, float) and is_nan(value): return None else: from mo_logs import Log Log.error("Do not know type of object (" + get_module("mo_json").value2json(value) + ")of to make a Duration")
def __init__(self, message="ping", every="second", start=None, until=None): if is_text(message): self.message = show_message(message) else: self.message = message self.every = Duration(every) if isinstance(until, Signal): self.please_stop = until elif until == None: self.please_stop = Signal() else: self.please_stop = Till(Duration(until).seconds) self.thread = None if start: self.thread = Thread.run( "repeat", _repeat, self.message, self.every, Date(start), parent_thread=MAIN_THREAD, please_stop=self.please_stop, ).release()
def __new__(cls, value=None, **kwargs): output = object.__new__(cls) if value == None: if kwargs: output.milli = datetime.timedelta( **kwargs).total_seconds() * 1000 output.month = 0 return output else: return None if is_number(value): output._milli = float(value) * 1000 output.month = 0 return output elif is_text(value): return parse(value) elif isinstance(value, Duration): output.milli = value.milli output.month = value.month return output elif isinstance(value, float) and is_nan(value): return None else: from mo_logs import Log Log.error("Do not know type of object (" + get_module("mo_json").value2json(value) + ")of to make a Duration")
def __init__(self, *values): """ :param values: DOt-delimited API names """ if any(not is_text(n) for n in values): Log.error("expecting strings") self.values = values
def latin12unicode(value): if is_text(value): Log.error("can not convert unicode from latin1") try: return text_type(value.decode('latin1')) except Exception as e: Log.error("Can not convert {{value|quote}} to unicode", value=value)
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): return coalesce(self.dimensions[expr], expr) elif is_text(expr): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif is_op(expr, QueryOp): return self._convert_query(expr) elif is_data(expr): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif is_many(expr): return wrap([self.convert(value) for value in expr]) else: return expr
def quote_value(value): """ convert values to mysql code for the same mostly delegate directly to the mysql lib, but some exceptions exist """ try: if value == None: return SQL_NULL elif isinstance(value, SQL): return value elif is_text(value): return SQL("'" + "".join(ESCAPE_DCT.get(c, c) for c in value) + "'") elif is_data(value): return quote_value(json_encode(value)) elif isinstance(value, datetime): return SQL("str_to_date('" + value.strftime("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif isinstance(value, Date): return SQL("str_to_date('" + value.format("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif is_number(value): return SQL(text(value)) elif hasattr(value, '__iter__'): return quote_value(json_encode(value)) else: return quote_value(text(value)) except Exception as e: Log.error("problem quoting SQL {{value}}", value=repr(value), cause=e)
def __radd__(self, other): if not isinstance(other, SQL): if is_text(other) and all(c not in other for c in ('"', '\'', '`')): return SQL(other + self.sql) Log.error("Can only concat other SQL") else: return SQL(other.sql + self.sql)
def __init__(self, script, data_type=OBJECT): Expression.__init__(self, None) if not is_text(script): Log.error("expecting text of a script") self.simplified = True self.script = script self.data_type = data_type
def note(template, **params): if not is_text(template): Log.error("Log.note was expecting a unicode template") if len(template) > 10000: template = template[:10000] log_params = wrap({ "template": template, "params": params, "timestamp": datetime.utcnow(), "machine": machine_metadata, "context": exceptions.NOTE, "thread": Thread.current() }) if not template.startswith("\n") and template.find("\n") > -1: template = "\n" + template if Log.trace: log_template = ( '{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - "{{location.file}}:{{location.line}}" ({{location.method}}) - ' + template.replace("{{", "{{params.")) f = sys._getframe(1) log_params.location = { "line": f.f_lineno, "file": text(f.f_code.co_filename.split(os.sep)[-1]), "method": text(f.f_code.co_name), } else: log_template = "{{timestamp|datetime}} - " + template.replace( "{{", "{{params.") Log.main_log.write(log_template, log_params)
def latin12unicode(value): if is_text(value): Log.error("can not convert unicode from latin1") try: return text(value.decode('latin1')) except Exception as e: Log.error("Can not convert {{value|quote}} to unicode", value=value)
def __radd__(self, other): if not isinstance(other, SQL): if is_text(other) and DEBUG and all(c not in other for c in ('"', "'", "`")): return ConcatSQL((SQL(other), self)) Log.error("Can only concat other SQL") else: return ConcatSQL((other, self))
def note(cls, template, default_params={}, stack_depth=0, log_context=None, **more_params): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ timestamp = datetime.utcnow() if not is_text(template): Log.error("Log.note was expecting a unicode template") Log._annotate( LogItem( context=exceptions.NOTE, format=template, template=template, params=dict(default_params, **more_params), ), timestamp, stack_depth + 1, )
def json2value(json_string, params=Null, flexible=False, leaves=False): """ :param json_string: THE JSON :param params: STANDARD JSON PARAMS :param flexible: REMOVE COMMENTS :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED :return: Python value """ json_string = text(json_string) if not is_text(json_string) and json_string.__class__.__name__ != "FileString": Log.error("only unicode json accepted") try: if params: # LOOKUP REFERENCES json_string = expand_template(json_string, params) if flexible: value = hjson2value(json_string) else: value = to_data(json_decoder(text(json_string))) if leaves: value = leaves_to_data(value) return value except Exception as e: e = Except.wrap(e) if not json_string.strip(): Log.error("JSON string is only whitespace") c = e while "Expecting '" in c.cause and "' delimiter: line" in c.cause: c = c.cause if "Expecting '" in c and "' delimiter: line" in c: line_index = int(strings.between(c.message, " line ", " column ")) - 1 column = int(strings.between(c.message, " column ", " ")) - 1 line = json_string.split("\n")[line_index].replace("\t", " ") if column > 20: sample = "..." + line[column - 20:] pointer = " " + (" " * 20) + "^" else: sample = line pointer = (" " * column) + "^" if len(sample) > 43: sample = sample[:43] + "..." Log.error(CAN_NOT_DECODE_JSON + " at:\n\t{{sample}}\n\t{{pointer}}\n", sample=sample, pointer=pointer) base_str = strings.limit(json_string, 1000).encode('utf8') hexx_str = bytes2hex(base_str, " ") try: char_str = " " + " ".join((c.decode("latin1") if ord(c) >= 32 else ".") for c in base_str) except Exception: char_str = " " Log.error(CAN_NOT_DECODE_JSON + ":\n{{char_str}}\n{{hexx_str}}\n", char_str=char_str, hexx_str=hexx_str, cause=e)
def tuple(data, field_name): """ RETURN LIST OF TUPLES """ if isinstance(data, Cube): Log.error("not supported yet") if isinstance(data, FlatList): Log.error("not supported yet") if is_data(field_name) and "value" in field_name: # SIMPLIFY {"value":value} AS STRING field_name = field_name["value"] # SIMPLE PYTHON ITERABLE ASSUMED if is_text(field_name): if len(split_field(field_name)) == 1: return [(d[field_name],) for d in data] else: path = split_field(field_name) output = [] flat_list._tuple1(data, path, 0, output) return output elif is_list(field_name): paths = [_select_a_field(f) for f in field_name] output = FlatList() _tuple((), unwrap(data), paths, 0, output) return output else: paths = [_select_a_field(field_name)] output = FlatList() _tuple((), data, paths, 0, output) return output
def write(self, content): """ :param content: text, or iterable of text :return: """ if not self.parent.exists: self.parent.create() with open(self._filename, "wb") as f: if is_list(content) and self.key: Log.error(u"list of data and keys are not supported, encrypt before sending to file") if is_list(content): pass elif isinstance(content, text): content = [content] elif hasattr(content, "__iter__"): pass for d in content: if not is_text(d): Log.error(u"Expecting unicode data only") if self.key: from mo_math.aes_crypto import encrypt f.write(encrypt(d, self.key).encode("utf8")) else: f.write(d.encode("utf8"))
def note(template, **params): if not is_text(template): Log.error("Log.note was expecting a unicode template") if len(template) > 10000: template = template[:10000] log_params = wrap( { "template": template, "params": params, "timestamp": datetime.utcnow(), "machine": machine_metadata, "context": exceptions.NOTE, } ) if not template.startswith("\n") and template.find("\n") > -1: template = "\n" + template if Log.trace: log_template = ( '{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - "{{location.file}}:{{location.line}}" ({{location.method}}) - ' + template.replace("{{", "{{params.") ) f = sys._getframe(1) log_params.location = { "line": f.f_lineno, "file": text_type(f.f_code.co_filename.split(os.sep)[-1]), "method": text_type(f.f_code.co_name), } else: log_template = "{{timestamp|datetime}} - " + template.replace("{{", "{{params.") Log.main_log.write(log_template, log_params)
def _expand(template, seq): """ seq IS TUPLE OF OBJECTS IN PATH ORDER INTO THE DATA TREE """ if is_text(template): return _simple_expand(template, seq) elif is_data(template): # EXPAND LISTS OF ITEMS USING THIS FORM # {"from":from, "template":template, "separator":separator} template = wrap(template) assert template["from"], "Expecting template to have 'from' attribute" assert template.template, "Expecting template to have 'template' attribute" data = seq[-1][template["from"]] output = [] for d in data: s = seq + (d,) output.append(_expand(template.template, s)) return coalesce(template.separator, "").join(output) elif is_list(template): return "".join(_expand(t, seq) for t in template) else: if not _Log: _late_import() _Log.error("can not handle")
def quote_value(value): """ convert values to mysql code for the same mostly delegate directly to the mysql lib, but some exceptions exist """ try: if value == None: return SQL_NULL elif isinstance(value, SQL): return quote_sql(value.template, value.param) elif is_text(value): return SQL("'" + "".join(ESCAPE_DCT.get(c, c) for c in value) + "'") elif is_data(value): return quote_value(json_encode(value)) elif is_number(value): return SQL(text_type(value)) elif isinstance(value, datetime): return SQL("str_to_date('" + value.strftime("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif isinstance(value, Date): return SQL("str_to_date('" + value.format("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif hasattr(value, '__iter__'): return quote_value(json_encode(value)) else: return quote_value(text_type(value)) except Exception as e: Log.error("problem quoting SQL {{value}}", value=repr(value), cause=e)
def _deep_json_to_string(value, depth): """ :param value: SOME STRUCTURE :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED :return: FLATTER STRUCTURE """ if is_data(value): if depth == 0: return strings.limit(value2json(value), LOG_STRING_LENGTH) return { k: _deep_json_to_string(v, depth - 1) for k, v in value.items() } elif is_sequence(value): return strings.limit(value2json(value), LOG_STRING_LENGTH) elif isinstance(value, number_types): return value elif is_text(value): return strings.limit(value, LOG_STRING_LENGTH) elif is_binary(value): return strings.limit(bytes2base64(value), LOG_STRING_LENGTH) elif isinstance(value, (date, datetime)): return datetime2unix(value) else: return strings.limit(value2json(value), LOG_STRING_LENGTH)
def note( cls, template, default_params={}, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ timestamp = datetime.utcnow() if not is_text(template): Log.error("Log.note was expecting a unicode template") Log._annotate( LogItem( context=exceptions.NOTE, format=template, template=template, params=dict(default_params, **more_params) ), timestamp, stack_depth+1 )
def output(value): if is_text(value): return func(value) elif hasattr(value, "__iter__"): return [output(v) for v in value] else: return func(value)
def _dict2json(value, sub_schema, path, net_new_properties, buffer): prefix = '{' for k, v in sort_using_key(value.items(), lambda r: r[0]): if v == None or v == '': continue append(buffer, prefix) prefix = COMMA if is_binary(k): k = k.decode('utf8') if not is_text(k): Log.error("Expecting property name to be a string") if k not in sub_schema: sub_schema[k] = {} net_new_properties.append(path + [k]) append(buffer, encode_basestring(encode_property(k))) append(buffer, COLON) typed_encode(v, sub_schema[k], path + [k], net_new_properties, buffer) if prefix is COMMA: append(buffer, COMMA) append(buffer, QUOTED_EXISTS_TYPE) append(buffer, '1}') else: append(buffer, '{') append(buffer, QUOTED_EXISTS_TYPE) append(buffer, '1}')
def _expand(template, seq): """ seq IS TUPLE OF OBJECTS IN PATH ORDER INTO THE DATA TREE """ if is_text(template): return _simple_expand(template, seq) elif is_data(template): # EXPAND LISTS OF ITEMS USING THIS FORM # {"from":from, "template":template, "separator":separator} template = wrap(template) assert template["from"], "Expecting template to have 'from' attribute" assert template.template, "Expecting template to have 'template' attribute" data = seq[-1][template["from"]] output = [] for d in data: s = seq + (d, ) output.append(_expand(template.template, s)) return coalesce(template.separator, "").join(output) elif is_list(template): return "".join(_expand(t, seq) for t in template) else: if not _Log: _late_import() _Log.error("can not handle")
def get_stats(query): frum = query.frum if isinstance(frum, Table): vars_record = {"table": frum.name} elif is_text(frum): vars_record = {"table": frum} else: vars_record = get_stats(frum) now = Date.now() vars_record['timestamp'] = now output = [] for clause in ["select", "edges", "groupby", "window", "sort"]: vars_record["mode"] = clause for expr in listwrap(getattr(query, clause)): if isinstance(expr.value, Expression): for v in expr.value.vars(): output.append(set_default({"column": v.var}, vars_record)) for v in query.where.vars(): output.append( set_default({ "column": v.var, "mode": "where" }, vars_record)) return output
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort == None: return FlatList.EMPTY output = FlatList() for s in listwrap(sort): if is_text(s): output.append({"value": jx_expression(s), "sort": 1}) elif is_expression(s): output.append({"value": s, "sort": 1}) elif mo_math.is_integer(s): output.append({"value": jx_expression({"offset": s}), "sort": 1}) elif not s.sort and not s.value and all(d in sort_direction for d in s.values()): for v, d in s.items(): output.append({ "value": jx_expression(v), "sort": sort_direction[d] }) elif not s.sort and not s.value: Log.error("`sort` clause must have a `value` property") else: output.append({ "value": jx_expression(coalesce(s.value, s.field)), "sort": sort_direction[s.sort] }) return output
def _dict2json(value, sub_schema, path, net_new_properties, buffer): prefix = '{' for k, v in sort_using_key(value.items(), lambda r: r[0]): if v == None or v == '': continue append(buffer, prefix) prefix = COMMA if is_binary(k): k = utf82unicode(k) if not is_text(k): Log.error("Expecting property name to be a string") if k not in sub_schema: sub_schema[k] = {} net_new_properties.append(path + [k]) append(buffer, encode_basestring(encode_property(k))) append(buffer, COLON) typed_encode(v, sub_schema[k], path + [k], net_new_properties, buffer) if prefix is COMMA: append(buffer, COMMA) append(buffer, QUOTED_EXISTS_TYPE) append(buffer, '1}') else: append(buffer, '{') append(buffer, QUOTED_EXISTS_TYPE) append(buffer, '1}')
def command_loop(local): STDOUT.write(b'{"out":"ok"}\n') DEBUG and Log.note("python process running") file = File while not please_stop: line = STDIN.readline() try: command = json2value(line.decode('utf8')) DEBUG and Log.note("got {{command}}", command=command) if "import" in command: dummy={} if is_text(command['import']): exec ("from " + command['import'] + " import *", dummy, context) else: exec ("from " + command['import']['from'] + " import " + ",".join(listwrap(command['import']['vars'])), dummy, context) STDOUT.write(DONE) elif "set" in command: for k, v in command.set.items(): context[k] = v STDOUT.write(DONE) elif "get" in command: STDOUT.write(value2json({"out": coalesce(local.get(command['get']), context.get(command['get']))}).encode('utf8')) STDOUT.write(b'\n') elif "stop" in command: STDOUT.write(DONE) please_stop.go() elif "exec" in command: if not is_text(command['exec']): Log.error("exec expects only text") exec (command['exec'], context, local) STDOUT.write(DONE) else: for k, v in command.items(): if is_list(v): exec ("_return = " + k + "(" + ",".join(map(value2json, v)) + ")", context, local) else: exec ("_return = " + k + "(" + ",".join(kk + "=" + value2json(vv) for kk, vv in v.items()) + ")", context, local) STDOUT.write(value2json({"out": local['_return']}).encode('utf8')) STDOUT.write(b'\n') except Exception as e: e = Except.wrap(e) STDOUT.write(value2json({"err": e}).encode('utf8')) STDOUT.write(b'\n') finally: STDOUT.flush()
def __getitem__(self, item): # TODO: SOLVE FUNDAMENTAL QUESTION OF IF SELECTING A PART OF AN # EDGE REMOVES THAT EDGE FROM THIS RESULT, OR ADDS THE PART # AS A select {"name":edge.name, "value":edge.domain.partitions[coord]} # PROBABLY NOT, THE value IS IDENTICAL OVER THE REMAINING if is_data(item): coordinates = [None] * len(self.edges) # MAP DICT TO NUMERIC INDICES for name, v in item.items(): ei, parts = first((i, e.domain.partitions) for i, e in enumerate(self.edges) if e.name == name) if not parts: Log.error( "Can not find {{name}}=={{value|quote}} in list of edges, maybe this feature is not implemented yet", name=name, value=v) part = first(p for p in parts if p.value == v) if not part: return Null else: coordinates[ei] = part.dataIndex edges = [e for e, v in zip(self.edges, coordinates) if v is None] if not edges: # ZERO DIMENSIONAL VALUE return dict_to_data({ k: v.__getitem__(coordinates) for k, v in self.data.items() }) else: output = Cube(select=self.select, edges=list_to_data([ e for e, v in zip(self.edges, coordinates) if v is None ]), data={ k: Matrix(values=c.__getitem__(coordinates)) for k, c in self.data.items() }) return output elif is_text(item): # RETURN A VALUE CUBE if self.is_value: if item != self.select.name: Log.error("{{name}} not found in cube", name=item) return self if item not in self.select.name: Log.error("{{name}} not found in cube", name=item) output = Cube(select=first(s for s in self.select if s.name == item), edges=self.edges, data={item: self.data[item]}) return output else: Log.error("not implemented yet")
def _replace_ref(node, url): if url.path.endswith("/"): url.path = url.path[:-1] if is_data(node): ref = None output = {} for k, v in node.items(): if k == "$ref": ref = URL(v) else: output[k] = _replace_ref(v, url) if not ref: return output node = output if not ref.scheme and not ref.path: # DO NOT TOUCH LOCAL REF YET output["$ref"] = ref return output if not ref.scheme: # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH # REQUIRES THE CURRENT DOCUMENT'S SCHEME ref.scheme = url.scheme # FIND THE SCHEME AND LOAD IT if ref.scheme in scheme_loaders: new_value = scheme_loaders[ref.scheme](ref, url) else: raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme) if ref.fragment: new_value = get_attr(new_value, ref.fragment) DEBUG and Log.note( "Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value) if not output: output = new_value elif is_text(output): Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value) else: output = unwrap(set_default(output, new_value)) DEBUG and Log.note("Return {{output}}", output=output) return output elif is_list(node): output = [_replace_ref(n, url) for n in node] # if all(p[0] is p[1] for p in zip(output, node)): # return node return output return node
def __radd__(self, other): if not isinstance(other, SQL): if (is_text(other) and ENABLE_TYPE_CHECKING and all(c not in other for c in ('"', "'", "`", "\\"))): return ConcatSQL(SQL(other), self) Log.error("Can only concat other SQL") else: return ConcatSQL(other, self)
def parse_hg_date(date): if is_text(date): return Date(date) elif is_sequence(date): # FIRST IN TUPLE (timestamp, time_zone) TUPLE, WHERE timestamp IS GMT return Date(date[0]) else: Log.error("Can not deal with date like {{date|json}}", date=date)
def select(self, fields): if is_data(fields): fields=fields.value if is_text(fields): # RETURN LIST OF VALUES if len(split_field(fields)) == 1: if self.path[0] == fields: return [d[1] for d in self.data] else: return [d[0][fields] for d in self.data] else: keys = split_field(fields) depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path)) # LENGTH OF COMMON PREFIX short_key = keys[depth:] output = FlatList() _select1((wrap(d[depth]) for d in self.data), short_key, 0, output) return output if is_list(fields): output = FlatList() meta = [] for f in fields: if hasattr(f.value, "__call__"): meta.append((f.name, f.value)) else: meta.append((f.name, functools.partial(lambda v, d: d[v], f.value))) for row in self._values(): agg = Data() for name, f in meta: agg[name] = f(row) output.append(agg) return output # meta = [] # for f in fields: # keys = split_field(f.value) # depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path)) # LENGTH OF COMMON PREFIX # short_key = join_field(keys[depth:]) # # meta.append((f.name, depth, short_key)) # # for row in self._data: # agg = Data() # for name, depth, short_key in meta: # if short_key: # agg[name] = row[depth][short_key] # else: # agg[name] = row[depth] # output.append(agg) # return output Log.error("multiselect over FlatList not supported")
def _replace_ref(node, url): if url.path.endswith("/"): url.path = url.path[:-1] if is_data(node): ref = None output = {} for k, v in node.items(): if k == "$ref": ref = URL(v) else: output[k] = _replace_ref(v, url) if not ref: return output node = output if not ref.scheme and not ref.path: # DO NOT TOUCH LOCAL REF YET output["$ref"] = ref return output if not ref.scheme: # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH # REQUIRES THE CURRENT DOCUMENT'S SCHEME ref.scheme = url.scheme # FIND THE SCHEME AND LOAD IT if ref.scheme in scheme_loaders: new_value = scheme_loaders[ref.scheme](ref, url) else: raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme) if ref.fragment: new_value = mo_dots.get_attr(new_value, ref.fragment) DEBUG and Log.note("Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value) if not output: output = new_value elif is_text(output): Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value) else: output = unwrap(set_default(output, new_value)) DEBUG and Log.note("Return {{output}}", output=output) return output elif is_list(node): output = [_replace_ref(n, url) for n in node] # if all(p[0] is p[1] for p in zip(output, node)): # return node return output return node
def command_loop(local): DEBUG and Log.note("mo-python process running with {{config|json}}", config=local['config']) while not please_stop: line = sys.stdin.readline() try: command = json2value(line.decode('utf8')) DEBUG and Log.note("got {{command}}", command=command) if "import" in command: dummy={} if is_text(command['import']): exec ("from " + command['import'] + " import *", dummy, context) else: exec ("from " + command['import']['from'] + " import " + ",".join(listwrap(command['import']['vars'])), dummy, context) STDOUT.write(DONE) elif "set" in command: for k, v in command.set.items(): context[k] = v STDOUT.write(DONE) elif "get" in command: STDOUT.write(value2json({"out": coalesce(local.get(command['get']), context.get(command['get']))})) STDOUT.write('\n') elif "stop" in command: STDOUT.write(DONE) please_stop.go() elif "exec" in command: if not is_text(command['exec']): Log.error("exec expects only text") exec (command['exec'], context, local) STDOUT.write(DONE) else: for k, v in command.items(): if is_list(v): exec ("_return = " + k + "(" + ",".join(map(value2json, v)) + ")", context, local) else: exec ("_return = " + k + "(" + ",".join(kk + "=" + value2json(vv) for kk, vv in v.items()) + ")", context, local) STDOUT.write(value2json({"out": local['_return']})) STDOUT.write('\n') except Exception as e: STDOUT.write(value2json({"err": e})) STDOUT.write('\n') finally: STDOUT.flush()
def quote_value(self, value): if value ==None: return SQL_NULL if is_list(value): json = value2json(value) return self.quote_value(json) if is_text(value) and len(value) > 256: value = value[:256] return SQL(adapt(value))
def _convert_group(self, column): if is_text(column): return wrap({ "name": column, "value": column, "domain": {"type": "default"} }) else: column = wrap(column) if (column.domain and column.domain.type != "default") or column.allowNulls != None: Log.error("groupby does not accept complicated domains") if not column.name and not is_text(column.value): Log.error("You must name compound edges: {{edge}}", edge= column) return wrap({ "name": coalesce(column.name, column.value), "value": column.value, "domain": {"type": "default"} })
def append(self, content, encoding='utf8'): """ add a line to file """ if not self.parent.exists: self.parent.create() with open(self._filename, "ab") as output_file: if not is_text(content): Log.error(u"expecting to write unicode only") output_file.write(content.encode(encoding)) output_file.write(b"\n")
def __contains__(self, value): if is_text(value): if self.template.find(value) >= 0 or self.message.find(value) >= 0: return True if self.context == value: return True for c in listwrap(self.cause): if value in c: return True return False
def expand_template(template, value): """ :param template: A UNICODE STRING WITH VARIABLE NAMES IN MOUSTACHES `{{.}}` :param value: Data HOLDING THE PARAMTER VALUES :return: UNICODE STRING WITH VARIABLES EXPANDED """ value = wrap(value) if is_text(template): return _simple_expand(template, (value,)) return _expand(template, (value,))
def flatten_causal_chain(log_item, output=None): output = output or [] if is_text(log_item): output.append({"template": log_item}) return output.append(log_item) for c in listwrap(log_item.cause): flatten_causal_chain(c, output) log_item.cause = None return output
def _where_terms(master, where, schema): """ USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS) """ if is_data(where): if where.term: # MAP TERM try: output = _map_term_using_schema(master, [], where.term, schema.edges) return output except Exception as e: Log.error("programmer problem?", e) elif where.terms: # MAP TERM output = FlatList() for k, v in where.terms.items(): if not is_container(v): Log.error("terms filter expects list of values") edge = schema.edges[k] if not edge: output.append({"terms": {k: v}}) else: if is_text(edge): # DIRECT FIELD REFERENCE return {"terms": {edge: v}} try: domain = edge.getDomain() except Exception as e: Log.error("programmer error", e) fields = domain.dimension.fields if is_data(fields): or_agg = [] for vv in v: and_agg = [] for local_field, es_field in fields.items(): vvv = vv[local_field] if vvv != None: and_agg.append({"term": {es_field: vvv}}) or_agg.append({"and": and_agg}) output.append({"or": or_agg}) elif is_list(fields) and len(fields) == 1 and is_variable_name(fields[0]): output.append({"terms": {fields[0]: v}}) elif domain.partitions: output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]}) return {"and": output} elif where["or"]: return {"or": [unwrap(_where_terms(master, vv, schema)) for vv in where["or"]]} elif where["and"]: return {"and": [unwrap(_where_terms(master, vv, schema)) for vv in where["and"]]} elif where["not"]: return {"not": unwrap(_where_terms(master, where["not"], schema))} return where