def extend(self, records): """ JUST SO WE MODEL A Queue """ records = {v["id"]: v["value"] for v in records} unwrap(self.data).update(records) data_as_json = convert.value2json(self.data, pretty=True) File(self.filename).write(data_as_json) Log.note("{{num}} documents added", num= len(records))
def post(sql): # FIND OUT THE default DOMAIN SIZES result = self.db.column_query(sql) num_edges = len(edges) for e, edge in enumerate(edges): domain = edge.domain if domain.type == "default": domain.type = "set" parts = set(result[e]) domain.partitions = [{"index": i, "value": p} for i, p in enumerate(parts)] domain.map = {p: i for i, p in enumerate(parts)} else: Log.error("Do not know what to do here, yet") # FILL THE DATA CUBE maps = [(unwrap(e.domain.map), result[i]) for i, e in enumerate(edges)] cubes = FlatList() for c, s in enumerate(select): data = Matrix(*[len(e.domain.partitions) + (1 if e.allow_nulls else 0) for e in edges]) for rownum, value in enumerate(result[c + num_edges]): coord = [m[r[rownum]] for m, r in maps] data[coord] = value cubes.append(data) if isinstance(query.select, list): return cubes else: return cubes[0]
def tuple(data, field_name): """ RETURN LIST OF TUPLES """ if isinstance(data, Cube): Log.error("not supported yet") if isinstance(data, FlatList): Log.error("not supported yet") if isinstance(field_name, Mapping) and "value" in field_name: # SIMPLIFY {"value":value} AS STRING field_name = field_name["value"] # SIMPLE PYTHON ITERABLE ASSUMED if isinstance(field_name, basestring): if len(split_field(field_name)) == 1: return [(d[field_name], ) for d in data] else: path = split_field(field_name) output = [] flat_list._tuple1(data, path, 0, output) return output elif isinstance(field_name, list): paths = [_select_a_field(f) for f in field_name] output = FL() _tuple((), unwrap(data), paths, 0, output) return output else: paths = [_select_a_field(field_name)] output = FL() _tuple((), data, paths, 0, output) return output
def dict2Multiset(dic): if dic == None: return None output = Multiset() output.dic = unwrap(dic).copy() return output
def error( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, **more_params ): """ raise an exception with a trace for the cause too :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) add_to_trace = False cause = wrap(unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)])) trace = exceptions.extract_stack(stack_depth + 1) if add_to_trace: cause[0].trace.extend(trace[1:]) e = Except(exceptions.ERROR, template, params, cause, trace) raise e
def warning( cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if isinstance(default_params, BaseException): cause = default_params default_params = {} if "values" in more_params.keys(): Log.error("Can not handle a logging parameter by name `values`") params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.WARNING, template, params, cause, trace) Log.note( "{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.WARNING}, log_context), stack_depth=stack_depth + 1 )
def unexpected( cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if isinstance(default_params, BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) if cause and not isinstance(cause, Except): cause = Except(exceptions.UNEXPECTED, unicode(cause), trace=exceptions._extract_traceback(0)) trace = exceptions.extract_stack(1) e = Except(exceptions.UNEXPECTED, template, params, cause, trace) Log.note( "{{error}}", error=e, log_context=set_default({"context": exceptions.WARNING}, log_context), stack_depth=stack_depth + 1 )
def call(self, proc_name, params): self._execute_backlog() params = [unwrap(v) for v in params] try: self.cursor.callproc(proc_name, params) self.cursor.close() self.cursor = self.db.cursor() except Exception, e: Log.error("Problem calling procedure " + proc_name, e)
def default(self, obj): if obj == None: return None elif isinstance(obj, set): return list(obj) elif isinstance(obj, Decimal): return float(obj) return json.JSONEncoder.default(self, unwrap(obj))
def es_fieldop(es, query): FromES = es09.util.build_es_query(query) select = listwrap(query.select) FromES.query = { "filtered": { "query": { "match_all": {} }, "filter": simplify_esfilter(jx_expression(query.where).to_esfilter()) } } FromES.size = coalesce(query.limit, 200000) FromES.fields = FlatList() for s in select.value: if s == "*": FromES.fields = None elif isinstance(s, list): FromES.fields.extend(s) elif isinstance(s, Mapping): FromES.fields.extend(s.values()) else: FromES.fields.append(s) FromES.sort = [{s.field: "asc" if s.sort >= 0 else "desc"} for s in query.sort] data = es09.util.post(es, FromES, query.limit) T = data.hits.hits matricies = {} for s in select: if s.value == "*": matricies[s.name] = Matrix.wrap([t._source for t in T]) elif isinstance(s.value, Mapping): # for k, v in s.value.items(): # matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T]) matricies[s.name] = Matrix.wrap([{k: unwrap(t.fields).get(v, None) for k, v in s.value.items()}for t in T]) elif isinstance(s.value, list): matricies[s.name] = Matrix.wrap([tuple(unwrap(t.fields).get(ss, None) for ss in s.value) for t in T]) elif not s.value: matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T]) else: try: matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T]) except Exception, e: Log.error("", e)
def _replace_ref(node, url): if url.path.endswith("/"): url.path = url.path[:-1] if isinstance(node, Mapping): ref = None output = {} for k, v in node.items(): if k == "$ref": ref = URL(v) else: output[k] = _replace_ref(v, url) if not ref: return output node = output if not ref.scheme and not ref.path: # DO NOT TOUCH LOCAL REF YET output["$ref"] = ref return output if not ref.scheme: # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH # REQUIRES THE CURRENT DOCUMENT'S SCHEME ref.scheme = url.scheme # FIND THE SCHEME AND LOAD IT if ref.scheme in scheme_loaders: new_value = scheme_loaders[ref.scheme](ref, url) else: raise _Log.error("unknown protocol {{scheme}}", scheme=ref.scheme) if ref.fragment: new_value = pyDots.get_attr(new_value, ref.fragment) if DEBUG: _Log.note("Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value) if not output: output = new_value else: output = unwrap(set_default(output, new_value)) if DEBUG: _Log.note("Return {{output}}", output=output) return output elif isinstance(node, list): output = [_replace_ref(n, url) for n in node] # if all(p[0] is p[1] for p in zip(output, node)): # return node return output return node
def reverse(vals): # TODO: Test how to do this fastest l = len(vals) output = [None] * l for v in unwrap(vals): l -= 1 output[l] = v return wrap(output)
def argparse(defs): parser = _argparse.ArgumentParser() for d in listwrap(defs): args = d.copy() name = args.name args.name = None parser.add_argument(*unwrap(listwrap(name)), **args) namespace = parser.parse_args() output = {k: getattr(namespace, k) for k in vars(namespace)} return wrap(output)
def __init__(self, name, data, schema=None): #TODO: STORE THIS LIKE A CUBE FOR FASTER ACCESS AND TRANSFORMATION data = list(unwrap(data)) Container.__init__(self, data, schema) if schema == None: self._schema = get_schema_from_list(data) else: self._schema = schema self.name = name self.data = data self.locker = Lock() # JUST IN CASE YOU WANT TO DO MORE THAN ONE THING
def select(data, field_name): """ return list with values from field_name """ if isinstance(data, Cube): return data._select(_normalize_selects(field_name)) if isinstance(data, FL): return data.select(field_name) if isinstance(data, UniqueIndex): data = data._data.values() # THE SELECT ROUTINE REQUIRES dicts, NOT Data WHILE ITERATING if isinstance(data, Mapping): return select_one(data, field_name) if isinstance(field_name, Mapping): field_name = wrap(field_name) if field_name.value in ["*", "."]: return data if field_name.value: # SIMPLIFY {"value":value} AS STRING field_name = field_name.value # SIMPLE PYTHON ITERABLE ASSUMED if isinstance(field_name, basestring): path = split_field(field_name) if len(path) == 1: return FL([d[field_name] for d in data]) else: output = FL() flat_list._select1(data, path, 0, output) return output elif isinstance(field_name, list): keys = [_select_a_field(wrap(f)) for f in field_name] return _select(Data(), unwrap(data), keys, 0) else: keys = [_select_a_field(field_name)] return _select(Data(), unwrap(data), keys, 0)
def filter(data, where): """ where - a function that accepts (record, rownum, rows) and returns boolean """ if len(data) == 0 or where == None or where == TRUE_FILTER: return data if isinstance(data, Container): return data.filter(where) if isinstance(data, (list, set)): temp = jx_expression_to_function(where) dd = wrap(data) return wrap([unwrap(d) for i, d in enumerate(data) if temp(wrap(d), i, dd)]) else: Log.error("Do not know how to handle type {{type}}", type=data.__class__.__name__) try: return drill_filter(where, data) except Exception, _: # WOW! THIS IS INEFFICIENT! return wrap([unwrap(d) for d in drill_filter(where, [DataObject(d) for d in data])])
def _send_email(self): try: if self.accumulation: conn = connect_to_region( self.settings.region, aws_access_key_id=unwrap(self.settings.aws_access_key_id), aws_secret_access_key=unwrap(self.settings.aws_secret_access_key), ) conn.send_email( source=self.settings.from_address, to_addresses=listwrap(self.settings.to_address), subject=self.settings.subject, body="\n\n".join(self.accumulation), format="text", ) conn.close() self.next_send = Date.now() + WAIT_TO_SEND_MORE self.accumulation = [] except Exception, e: self.next_send = Date.now() + WAIT_TO_SEND_MORE Log.warning("Could not send", e)
def __init__( self, aws_access_key_id=None, # CREDENTIAL aws_secret_access_key=None, # CREDENTIAL region=None, # NAME OF AWS REGION, REQUIRED FOR SOME BUCKETS settings=None ): self.settings = settings try: if not settings.region: self.connection = boto.connect_s3( aws_access_key_id=unwrap(self.settings.aws_access_key_id), aws_secret_access_key=unwrap(self.settings.aws_secret_access_key) ) else: self.connection = boto.s3.connect_to_region( self.settings.region, aws_access_key_id=unwrap(self.settings.aws_access_key_id), aws_secret_access_key=unwrap(self.settings.aws_secret_access_key) ) except Exception, e: Log.error("Problem connecting to S3", e)
def __init__( self, name, region, aws_access_key_id=None, aws_secret_access_key=None, debug=False, settings=None ): self.settings = settings self.pending = [] if settings.region not in [r.name for r in sqs.regions()]: Log.error("Can not find region {{region}} in {{regions}}", region=settings.region, regions=[r.name for r in sqs.regions()]) conn = sqs.connect_to_region( region_name=unwrap(settings.region), aws_access_key_id=unwrap(settings.aws_access_key_id), aws_secret_access_key=unwrap(settings.aws_secret_access_key), ) self.queue = conn.get_queue(settings.name) if self.queue == None: Log.error("Can not find queue with name {{queue}} in region {{region}}", queue=settings.name, region=settings.region)
def sort(data, fieldnames=None, already_normalized=False): """ PASS A FIELD NAME, OR LIST OF FIELD NAMES, OR LIST OF STRUCTS WITH {"field":field_name, "sort":direction} """ try: if data == None: return Null if not fieldnames: return wrap(sorted(data, value_compare)) if already_normalized: formal = fieldnames else: formal = query._normalize_sort(fieldnames) funcs = [(jx_expression_to_function(f.value), f.sort) for f in formal] def comparer(left, right): for func, sort_ in funcs: try: result = value_compare(func(left), func(right), sort_) if result != 0: return result except Exception, e: Log.error("problem with compare", e) return 0 if isinstance(data, list): output = FL([unwrap(d) for d in sorted(data, cmp=comparer)]) elif hasattr(data, "__iter__"): output = FL([unwrap(d) for d in sorted(list(data), cmp=comparer)]) else: Log.error("Do not know how to handle") output = None return output
def es_aggop(es, mvel, query): select = listwrap(query.select) FromES = build_es_query(query) isSimple = AND(aggregates[s.aggregate] == "count" for s in select) if isSimple: return es_countop(es, query) # SIMPLE, USE TERMS FACET INSTEAD value2facet = dict() # ONLY ONE FACET NEEDED PER name2facet = dict() # MAP name TO FACET WITH STATS for s in select: if s.value not in value2facet: if isinstance(s.value, Variable): unwrap(FromES.facets)[s.name] = { "statistical": { "field": s.value.var }, "facet_filter": simplify_esfilter(query.where.to_esfilter()) } else: unwrap(FromES.facets)[s.name] = { "statistical": { "script": jx_expression_to_function(s.value) }, "facet_filter": simplify_esfilter(query.where) } value2facet[s.value] = s.name name2facet[s.name] = value2facet[s.value] data = es09.util.post(es, FromES, query.limit) matricies = {s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[aggregates[s.aggregate]]) for s in select} cube = Cube(query.select, [], matricies) cube.frum = query return cube
def format_cube(T, select, query=None): table = format_table(T, select, query) if len(table.data) == 0: return Cube( select, edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": 0, "interval": 1}}], data={h: Matrix(list=[]) for i, h in enumerate(table.header)} ) cols = zip(*unwrap(table.data)) return Cube( select, edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(table.data), "interval": 1}}], data={h: Matrix(list=cols[i]) for i, h in enumerate(table.header)} )
def _replace_locals(node, doc_path): if isinstance(node, Mapping): # RECURS, DEEP COPY ref = None output = {} for k, v in node.items(): if k == "$ref": ref = v elif v == None: continue else: output[k] = _replace_locals(v, [v] + doc_path) if not ref: return output # REFER TO SELF frag = ref.fragment if frag[0] == ".": # RELATIVE for i, p in enumerate(frag): if p != ".": if i>len(doc_path): _Log.error("{{frag|quote}} reaches up past the root document", frag=frag) new_value = pyDots.get_attr(doc_path[i-1], frag[i::]) break else: new_value = doc_path[len(frag) - 1] else: # ABSOLUTE new_value = pyDots.get_attr(doc_path[-1], frag) new_value = _replace_locals(new_value, [new_value] + doc_path) if not output: return new_value # OPTIMIZATION FOR CASE WHEN node IS {} else: return unwrap(set_default(output, new_value)) elif isinstance(node, list): candidate = [_replace_locals(n, [n] + doc_path) for n in node] # if all(p[0] is p[1] for p in zip(candidate, node)): # return node return candidate return node
def note( cls, template, default_params={}, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if len(template) > 10000: template = template[:10000] params = dict(unwrap(default_params), **more_params) log_params = set_default({ "template": template, "params": params, "timestamp": datetime.utcnow(), "machine": machine_metadata }, log_context, {"context": exceptions.NOTE}) if not template.startswith("\n") and template.find("\n") > -1: template = "\n" + template if cls.trace: log_template = "{{machine.name}} (pid{{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" ({{location.method}}) - " + template.replace("{{", "{{params.") f = sys._getframe(stack_depth + 1) log_params.location = { "line": f.f_lineno, "file": f.f_code.co_filename.split(os.sep)[-1], "method": f.f_code.co_name } thread = _Thread.current() log_params.thread = {"name": thread.name, "id": thread.id} else: log_template = "{{timestamp|datetime}} - " + template.replace("{{", "{{params.") cls.main_log.write(log_template, log_params)
def fatal( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, log_context=None, **more_params ): """ SEND TO STDERR :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.ERROR, template, params, cause, trace) str_e = unicode(e) error_mode = cls.error_mode with suppress_exception: if not error_mode: cls.error_mode = True Log.note( "{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.FATAL}, log_context), stack_depth=stack_depth + 1 ) cls.error_mode = error_mode sys.stderr.write(str_e.encode('utf8'))
def add(self, val): val = datawrap(val) key = value2key(self._keys, val) if key == None: Log.error("Expecting key to be not None") d = self._data.get(key) if d is None: self._data[key] = unwrap(val) self.count += 1 elif d is not val: if self.fail_on_dup: Log.error("{{new|json}} with key {{key|json}} already filled with {{old|json}}", key=key, new=val, old=self[val]) elif DEBUG: Log.warning("key {{key|json}} already filled\nExisting\n{{existing|json|indent}}\nValue\n{{value|json|indent}}", key=key, existing=d, value=val )
def test_json(results, description, method, n): output = [] for case in cases: try: data, count = globals()[case] if "scrub" in description: #SCRUB BEFORE SENDING TO C ROUTINE (NOT FAIR, BUT WE GET TO SEE HOW FAST ENCODING GOES) data = unwrap(scrub(data)) try: example = method(data) if case == "HUGE": example = "<too big to show>" except Exception, e: Log.warning("json encoding failure", cause=e) example = "<CRASH>" t0 = time.time() try: for i in range(n): for i in range(count): output.append(method(data)) duration = time.time() - t0 except Exception: duration = time.time() - t0 summary = { "description": description, "interpreter": platform.python_implementation(), "time": duration, "type": case, "num": n, "count": count, "length": len(output), "result": example } Log.note("{{interpreter}}: {{description}} {{type}} x {{num}} x {{count}} = {{time}} result={{result}}", **summary) results.append(summary) except Exception, e: Log.warning("problem with encoding: {{message}}", {"message": e.message}, e)
def drill_filter(esfilter, data): """ PARTIAL EVALUATE THE FILTER BASED ON DATA GIVEN TODO: FIX THIS MONUMENALLY BAD IDEA """ esfilter = unwrap(esfilter) primary_nested = [] # track if nested, changes if not primary_column = [] # only one path allowed primary_branch = [] # CONTAINS LISTS OF RECORDS TO ITERATE: constantly changing as we dfs the tree def parse_field(fieldname, data, depth): """ RETURN (first, rest) OF fieldname """ col = split_field(fieldname) d = data for i, c in enumerate(col): try: d = d[c] except Exception, e: Log.error("{{name}} does not exist", name=fieldname) if isinstance(d, list) and len(col) > 1: if len(primary_column) <= depth+i: primary_nested.append(True) primary_column.append(c) primary_branch.append(d) elif primary_nested[depth] and primary_column[depth+i] != c: Log.error("only one branch of tree allowed") else: primary_nested[depth+i] = True primary_column[depth+i] = c primary_branch[depth+i] = d return c, join_field(col[i+1:]) else: if len(primary_column) <= depth+i: primary_nested.append(False) primary_column.append(c) primary_branch.append([d]) return fieldname, None
assert settings["class"] # IMPORT MODULE FOR HANDLER path = settings["class"].split(".") class_name = path[-1] path = ".".join(path[:-1]) constructor = None try: temp = __import__(path, globals(), locals(), [class_name], -1) constructor = object.__getattribute__(temp, class_name) except Exception, e: if settings.stream and not constructor: # PROVIDE A DEFAULT STREAM HANLDER constructor = TextLog_usingThreadedStream else: Log.error("Can not find class {{class}}", {"class": path}, cause=e) # IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS if settings.filename: from pyLibrary.env.files import File f = File(settings.filename) if not f.parent.exists: f.parent.create() settings['class'] = None params = unwrap(settings) log_instance = constructor(**params) return log_instance
def selector(d): output = Data() for n, p in push_and_pull: output[n] = p(wrap(d)) return unwrap(output)