def process_task(mail, please_stop=None): try: if not this.call: this.call = this.celery.Task.__call__ this.dummy = this.celery.Task() name = mail.sender.name args = (mail,) if mail.sender.bind else tuple() this._status_update(mail, states.STARTED, {"response": {"start_time": Date.now().format()}}) fun = this.celery._tasks[name] mail.result = this.call(this.dummy, fun, *args, **unwrap(mail.message)) mail.status = states.SUCCESS except Exception as e: mail.result = Except.wrap(e) mail.status = states.FAILURE # mail = wrap({"request": {"id": mail.request.id}, "sender": {"name": "mail.sender.name"}}) Log.warning("worker failed to process {{mail}}", mail=mail, cause=e) mail.response.end_time = Date.now().format() if isinstance(mail.result, Exception): mail.result = Except.wrap(mail.result) mail.receiver.thread = None Log.note("Add {{id}} ({{name}}) to response queue\n{{result}}", id=mail.request.id, name=mail.sender.name, result=mail) this.response_queue.add(value2json(mail)) with work_list_lock: del work_list[mail.request.id]
def _wait_for_exit(please_stop): """ /dev/null PIPED TO sys.stdin SPEWS INFINITE LINES, DO NOT POLL AS OFTEN """ cr_count = 0 # COUNT NUMBER OF BLANK LINES while not please_stop: # if DEBUG: # Log.note("inside wait-for-shutdown loop") if cr_count > 30: (Till(seconds=3) | please_stop).wait() try: line = sys.stdin.readline() except Exception as e: Except.wrap(e) if "Bad file descriptor" in e: _wait_for_interrupt(please_stop) break # if DEBUG: # Log.note("read line {{line|quote}}, count={{count}}", line=line, count=cr_count) if line == "": cr_count += 1 else: cr_count = -1000000 # NOT /dev/null if line.strip() == "exit": Log.alert("'exit' Detected! Stopping...") return
def _wait_for_exit(please_stop): """ /dev/null PIPED TO sys.stdin SPEWS INFINITE LINES, DO NOT POLL AS OFTEN """ try: import msvcrt _wait_for_exit_on_windows(please_stop) except: pass cr_count = 0 # COUNT NUMBER OF BLANK LINES while not please_stop: # DEBUG and Log.note("inside wait-for-shutdown loop") if cr_count > 30: (Till(seconds=3) | please_stop).wait() try: line = sys.stdin.readline() except Exception as e: Except.wrap(e) if "Bad file descriptor" in e: _wait_for_interrupt(please_stop) break # DEBUG and Log.note("read line {{line|quote}}, count={{count}}", line=line, count=cr_count) if line == "": cr_count += 1 else: cr_count = -1000000 # NOT /dev/null if line.strip() == "exit": Log.alert("'exit' Detected! Stopping...") return
def write(self, template, params): try: self.queue.add({"template": template, "params": params}) return self except Exception as e: e = Except.wrap(e) raise e # OH NO!
def _merge(*schemas): if len(schemas) == 1: return schemas[0] try: if any(NESTED_TYPE in s for s in schemas): # IF THERE ARE ANY ARRAYS, THEN THE MERGE IS AN ARRAY new_schemas = [] for schema in schemas: if NESTED_TYPE in schema: sub_schema = schema[NESTED_TYPE] residue = {k: v for k, v in schema.items() if k != NESTED_TYPE} new_schemas.append(_merge(sub_schema, residue)) else: new_schemas.append(schema) return {NESTED_TYPE: _merge(*new_schemas)} else: return OrderedDict( (k, _merge(*(ss for s in schemas for ss in [s.get(k)] if ss))) for k in jx.sort(set(k for s in schemas for k in s.keys())) ) except Exception as e: e = Except.wrap(e) if "Expecting types to match" in e: raise e t = list(set(schemas)) if len(t) == 1: return t[0] elif len(t) == 2 and STRING in t and NUMBER in t: return STRING else: Log.error("Expecting types to match {{types|json}}", types=t)
def assertRaises(self, problem=None, function=None, *args, **kwargs): if function is None: return RaiseContext(self, problem=problem or Exception) try: function(*args, **kwargs) except Exception as e: if issubclass(problem, BaseException) and isinstance(e, problem): return f = Except.wrap(e) if is_text(problem): if problem in f: return Log.error( "expecting an exception returning {{problem|quote}} got something else instead", problem=problem, cause=f) elif not isinstance(f, problem) and not isinstance(e, problem): Log.error( "expecting an exception of type {{type}} to be raised", type=problem) else: return Log.error("Expecting an exception to be raised")
def query(self, _query): try: query = QueryOp.wrap(_query, container=self, namespace=self.namespace) for s in listwrap(query.select): if s.aggregate != None and not aggregates.get(s.aggregate): Log.error( "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate", name=s.name, aggregate=s.aggregate ) frum = query["from"] if isinstance(frum, QueryOp): result = self.query(frum) q2 = query.copy() q2.frum = result return jx.run(q2) if is_deepop(self.es, query): return es_deepop(self.es, query) if is_aggsop(self.es, query): return es_aggsop(self.es, frum, query) if is_setop(self.es, query): return es_setop(self.es, query) Log.error("Can not handle") except Exception as e: e = Except.wrap(e) if "Data too large, data for" in e: http.post(self.es.cluster.url / "_cache/clear") Log.error("Problem (Tried to clear Elasticsearch cache)", e) Log.error("problem", e)
def _db_insert_column(self, column): try: self.db.execute( "INSERT INTO" + db_table_name + sql_iso(all_columns) + "VALUES" + sql_iso( sql_list( [ quote_value(column[c.name]) if c.name not in ("nested_path", "partitions") else quote_value(value2json(column[c.name])) for c in METADATA_COLUMNS ] ) ) ) except Exception as e: e = Except.wrap(e) if "UNIQUE constraint failed" in e or " are not unique" in e: # THIS CAN HAPPEN BECAUSE todo HAS OLD COLUMN DATA self.todo.add((UPDATE, column), force=True) else: Log.error("do not know how to handle", cause=e)
def get_branches(hg, branches, kwargs=None): # TRY ES cluster = elasticsearch.Cluster(branches) try: es = cluster.get_index(kwargs=branches, read_only=False) esq = jx_elasticsearch.new_instance(branches) found_branches = esq.query({"from": "branches", "format": "list", "limit": 10000}).data # IF IT IS TOO OLD, THEN PULL FROM HG oldest = Date(MAX(found_branches.etl.timestamp)) if oldest == None or Date.now() - oldest > OLD_BRANCH: found_branches = _get_branches_from_hg(hg) es.extend({"id": b.name + " " + b.locale, "value": b} for b in found_branches) es.flush() try: return UniqueIndex(["name", "locale"], data=found_branches, fail_on_dup=False) except Exception as e: Log.error("Bad branch in ES index", cause=e) except Exception as e: e = Except.wrap(e) if "Can not find index " in e: set_default(branches, {"schema": branches_schema}) es = cluster.get_or_create_index(branches) es.add_alias() return get_branches(kwargs) Log.error("problem getting branches", cause=e)
def json2value(json_string, params=Null, flexible=False, leaves=False): """ :param json_string: THE JSON :param params: STANDARD JSON PARAMS :param flexible: REMOVE COMMENTS :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED :return: Python value """ json_string = text(json_string) if not is_text(json_string) and json_string.__class__.__name__ != "FileString": Log.error("only unicode json accepted") try: if params: # LOOKUP REFERENCES json_string = expand_template(json_string, params) if flexible: value = hjson2value(json_string) else: value = to_data(json_decoder(text(json_string))) if leaves: value = leaves_to_data(value) return value except Exception as e: e = Except.wrap(e) if not json_string.strip(): Log.error("JSON string is only whitespace") c = e while "Expecting '" in c.cause and "' delimiter: line" in c.cause: c = c.cause if "Expecting '" in c and "' delimiter: line" in c: line_index = int(strings.between(c.message, " line ", " column ")) - 1 column = int(strings.between(c.message, " column ", " ")) - 1 line = json_string.split("\n")[line_index].replace("\t", " ") if column > 20: sample = "..." + line[column - 20:] pointer = " " + (" " * 20) + "^" else: sample = line pointer = (" " * column) + "^" if len(sample) > 43: sample = sample[:43] + "..." Log.error(CAN_NOT_DECODE_JSON + " at:\n\t{{sample}}\n\t{{pointer}}\n", sample=sample, pointer=pointer) base_str = strings.limit(json_string, 1000).encode('utf8') hexx_str = bytes2hex(base_str, " ") try: char_str = " " + " ".join((c.decode("latin1") if ord(c) >= 32 else ".") for c in base_str) except Exception: char_str = " " Log.error(CAN_NOT_DECODE_JSON + ":\n{{char_str}}\n{{hexx_str}}\n", char_str=char_str, hexx_str=hexx_str, cause=e)
def _stop_main_thread(): try: MAIN_THREAD.stop() except Exception as e: e = Except.wrap(e) Log.warning("Problem with threads", cause=e) sys.exit(0)
def get_raw_json(path): active_data_timer = Timer("total duration") body = flask.request.get_data() try: with active_data_timer: args = scrub_args(flask.request.args) limit = args.limit if args.limit else 10 args.limit = None frum = find_container(path, after=None) result = jx.run( { "from": path, "where": { "eq": args }, "limit": limit, "format": "list" }, frum) if isinstance( result, Container ): # TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers result = result.format("list") result.meta.active_data_response_time = active_data_timer.duration response_data = value2json(result.data, pretty=True).encode('utf8') Log.note("Response is {{num}} bytes", num=len(response_data)) return Response(response_data, status=200) except Exception as e: e = Except.wrap(e) return send_error(active_data_timer, body, e)
def value2json(obj, pretty=False, sort_keys=False, keep_whitespace=True): """ :param obj: THE VALUE TO TURN INTO JSON :param pretty: True TO MAKE A MULTI-LINE PRETTY VERSION :param sort_keys: True TO SORT KEYS :param keep_whitespace: False TO strip() THE WHITESPACE IN THE VALUES :return: """ if FIND_LOOPS: obj = scrub(obj, scrub_text=_keep_whitespace if keep_whitespace else _trim_whitespace()) try: json = json_encoder(obj, pretty=pretty) if json == None: Log.note(str(type(obj)) + " is not valid{{type}}JSON", type=" (pretty) " if pretty else " ") Log.error("Not valid JSON: " + str(obj) + " of type " + str(type(obj))) return json except Exception as e: e = Except.wrap(e) try: json = pypy_json_encode(obj) return json except Exception: pass Log.error("Can not encode into JSON: {{value}}", value=text_type(repr(obj)), cause=e)
def track_setup( instance_setup_function, request, instance, # THE boto INSTANCE OBJECT FOR THE MACHINE TO SETUP utility, # THE utility OBJECT FOUND IN CONFIG please_stop): try: instance_setup_function(instance, utility, please_stop) instance.add_tag( "Name", self.settings.ec2.instance.name + " (running)") with self.net_new_locker: self.net_new_spot_requests.remove(request.id) except Exception as e: e = Except.wrap(e) instance.add_tag("Name", "") with failed_locker: failed_attempts[request.id] += [e] if "Can not setup unknown " in e: Log.warning("Unexpected failure on startup", instance_id=instance.id, cause=e) elif ERROR_ON_CALL_TO_SETUP in e: with failed_locker: causes = failed_attempts[request.id] if len(causes) > 2: Log.warning("Problem with setup() of {{instance_id}}", instance_id=instance.id, cause=causes) else: Log.warning("Unexpected failure on startup", instance_id=instance.id, cause=e)
def setUpClass(self): while True: try: es = test_jx.global_settings.backend_es http.get_json(URL(es.host, port=es.port)) break except Exception as e: e = Except.wrap(e) if "No connection could be made because the target machine actively refused it" in e or "Connection refused" in e: Log.alert("Problem connecting") else: Log.error("Server raised exception", e) # REMOVE OLD INDEXES cluster = elasticsearch.Cluster(test_jx.global_settings.backend_es) aliases = cluster.get_aliases() for a in aliases: try: if a.index.startswith("testing_"): create_time = Date( a.index[-15:], "%Y%m%d_%H%M%S" ) # EXAMPLE testing_0ef53e45b320160118_180420 if create_time < Date.now() - 10 * MINUTE: cluster.delete_index(a.index) except Exception as e: Log.warning("Problem removing {{index|quote}}", index=a.index, cause=e)
def wrapper(*args, **kwargs): try: func_name = get_function_name(func) if func_name in ("__init__", "__new__") and "kwargs" in kwargs: packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), kwargs["kwargs"], defaults) return func(args[0], **packed) elif func_name in ("__init__", "__new__") and len( args) == 2 and len(kwargs) == 0 and isinstance( args[1], Mapping): # ASSUME SECOND UNNAMED PARAM IS kwargs packed = params_pack(params, args[1], defaults) return func(args[0], **packed) elif func_name in ("__init__", "__new__"): # DO NOT INCLUDE self IN kwargs packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), defaults) return func(args[0], **packed) elif params[0] == "self" and "kwargs" in kwargs: packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), kwargs["kwargs"], defaults) return func(args[0], **packed) elif params[0] == "self" and len(args) == 2 and len( kwargs) == 0 and isinstance(args[1], Mapping): # ASSUME SECOND UNNAMED PARAM IS kwargs packed = params_pack(params, args[1], defaults) return func(args[0], **packed) elif params[0] == "self": packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), defaults) return func(args[0], **packed) elif len(args) == 1 and len(kwargs) == 0 and isinstance( args[0], Mapping): # ASSUME SINGLE PARAMETER IS A SETTING packed = params_pack(params, args[0], defaults) return func(**packed) elif "kwargs" in kwargs and isinstance(kwargs["kwargs"], Mapping): # PUT args INTO kwargs packed = params_pack(params, kwargs, dict_zip(params, args), kwargs["kwargs"], defaults) return func(**packed) else: # PULL kwargs OUT INTO PARAMS packed = params_pack(params, kwargs, dict_zip(params, args), defaults) return func(**packed) except TypeError as e: e = Except.wrap(e) if e.message.startswith(func_name) and "takes at least" in e: missing = [p for p in params if str(p) not in packed] get_logger().error( "Problem calling {{func_name}}: Expecting parameter {{missing}}, given {{given}}", func_name=func_name, missing=missing, given=packed.keys(), stack_depth=1) get_logger().error("Error dispatching call", e)
def value2json(value): try: scrubbed = scrub(value, scrub_number=float) return text(_json_encoder(scrubbed)) except Exception as e: e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text(repr(value)), cause=e) raise e
def extend(self, rows): if self.read_only: Log.error("not for writing") try: update = {} with Timer("encoding"): while True: output = [] for rownum, row in enumerate(rows): typed, more, add_nested = typed_encode(row, self.flake) update.update(more) if add_nested: # row HAS NEW NESTED COLUMN! # GO OVER THE rows AGAIN SO "RECORD" GET MAPPED TO "REPEATED" break output.append(typed) else: break if update or not self.shard: # BATCH HAS ADDITIONAL COLUMNS!! # WE CAN NOT USE THE EXISTING SHARD, MAKE A NEW ONE: self._create_new_shard() Log.note("added new shard with name: {{shard}}", shard=self.shard.table_id) with Timer("insert {{num}} rows to bq", param={"num": len(rows)}): failures = self.container.client.insert_rows_json( self.shard, json_rows=output, row_ids=[None] * len(output), skip_invalid_rows=False, ignore_unknown_values=False, ) if failures: if all(r == "stopped" for r in wrap(failures).errors.reason): self._create_new_shard() Log.note( "STOPPED encountered: Added new shard with name: {{shard}}", shard=self.shard.table_id, ) Log.error( "Got {{num}} failures:\n{{failures|json}}", num=len(failures), failures=failures[:5], ) else: self.last_extend = Date.now() Log.note("{{num}} rows added", num=len(output)) except Exception as e: e = Except.wrap(e) if len(rows) > 1 and "Request payload size exceeds the limit" in e: # TRY A SMALLER BATCH cut = len(rows) // 2 self.extend(rows[:cut]) self.extend(rows[cut:]) return Log.error("Do not know how to handle", cause=e)
def relay_post(path): try: return cache.request("post", path, flask.request.headers) except Exception as e: e = Except.wrap(e) Log.warning("could not handle request", cause=e) return Response(unicode2utf8(value2json(e, pretty=True)), status=400, headers={"Content-Type": "text/html"})
def jitter_MWU(values, start, mid, end): """ RETURN A BETTER MIDPOINT< ACCOUNTING FOR t-test RESULTS """ # ADD SOME CONSTRAINTS TO THE RANGE OF VALUES TESTED m_start = min(mid, max(start + MIN_POINTS, mid - JITTER)) m_end = max(mid, min(mid + JITTER, end - MIN_POINTS)) if m_start == m_end: return no_good_edge, no_good_edge, mid mids = np.array(range(m_start, m_end)) # MWU SCORES try: m_score = np.array([ stats.mannwhitneyu( values[max(start, m - MAX_POINTS):m], values[m:min(end, m + MAX_POINTS)], use_continuity=True, alternative="two-sided", ) for m in mids ]) t_score = np.array([ stats.ttest_ind( values[max(start, m - MAX_POINTS):m], values[m:min(end, m + MAX_POINTS)], equal_var=False, ) for m in mids ]) except Exception as e: e = Except.wrap(e) if "All numbers are identical" in e: return no_good_edge, no_good_edge, mids[0] raise e # TOTAL SUM-OF-SQUARES # DO NOT KNOW WHAT THIS WAS DOING # if m_start - start == 0: # # WE CAN NOT OFFSET BY ONE, SO WE ADD A DUMMY VALUE # v_prefix = np.array([np.nan] + list(not_right(cumSS(values[start:m_end]), 1))) # else: # # OFFSET BY ONE, WE WANT cumSS OF ALL **PREVIOUS** VALUES # v_prefix = not_right( # not_left(cumSS(values[start:m_end]), m_start - start - 1), 1 # ) # v_suffix = not_right(cumSS(values[m_start:end][::-1])[::-1], end - m_end) # v_score = v_prefix + v_suffix # pvalue = np.sqrt(m_score[:, 1] * v_score) # GOEMEAN OF SCORES # PICK LOWEST pvalue = np.sqrt(m_score[:, 1] * t_score[:, 1]) best = np.argmin(pvalue) return Data(pvalue=m_score[best, 1]), Data(pvalue=t_score[best, 1]), mids[best]
def _db_transaction(self): self.db.execute(str("BEGIN")) try: yield self.db.execute(str("COMMIT")) except Exception as e: e = Except.wrap(e) self.db.execute(str("ROLLBACK")) Log.error("Transaction failed", cause=e)
def _wait_for_exit(please_stop): """ /dev/null PIPED TO sys.stdin SPEWS INFINITE LINES, DO NOT POLL AS OFTEN """ try: import msvcrt _wait_for_exit_on_windows(please_stop) return except: pass cr_count = 0 # COUNT NUMBER OF BLANK LINES try: while not please_stop: # DEBUG and Log.note("inside wait-for-shutdown loop") if cr_count > 30: (Till(seconds=3) | please_stop).wait() try: # line = "" line = STDIN.readline() except Exception as e: Except.wrap(e) if "Bad file descriptor" in e: Log.note("can not read from stdin") _wait_for_interrupt(please_stop) break # DEBUG and Log.note("read line {{line|quote}}, count={{count}}", line=line, count=cr_count) if not line: cr_count += 1 else: cr_count = -1000000 # NOT /dev/null if line.strip() == b"exit": Log.alert("'exit' Detected! Stopping...") return except Exception as e: Log.warning("programming error", cause=e) finally: if please_stop: Log.note("please_stop has been requested") Log.note("done waiting for exit")
def post_till_response(self, *args, **kwargs): while True: try: response = self.server.post(*args, **kwargs) return response except Exception as e: e = Except.wrap(e) if "No connection could be made because the target machine actively refused it" in e: Log.alert("Problem connecting, retrying") else: Log.error("Server raised exception", e)
def test_dash_in_tablename(self): try: result = parse( "select * from coverage-summary.source.file.covered limit 20") self.assertTrue(False, "expecting to fail") except Exception as e: e = Except.wrap(e) self.assertTrue( all(v in str(e) for v in ["group by", "order by", "having", "limit", "where"]), "expecting mention of other expected clauses")
def delete(self): try: if os.path.isdir(self._filename): shutil.rmtree(self._filename) elif os.path.isfile(self._filename): os.remove(self._filename) return self except Exception as e: e = Except.wrap(e) if u"The system cannot find the path specified" in e: return Log.error(u"Could not remove file", e)
def try_till_response(self, *args, **kwargs): while True: try: response = self.server.get(*args, **kwargs) return response except Exception as e: e = Except.wrap(e) if "No connection could be made because the target machine actively refused it" in e or "Connection refused" in e: Log.alert("Problem connecting") Till(seconds=WAIT_AFTER_PROBLEM).wait() else: Log.error("Server raised exception", e)
def query(self, sql, param=None, stream=False, row_tuples=False): """ RETURN A LIST OF dicts :param sql: SQL TEMPLATE TO SEND :param param: PARAMETERS TO INJECT INTO SQL TEMPLATE :param stream: STREAM OUTPUT :param row_tuples: DO NOT RETURN dicts """ if not self.cursor: # ALLOW NON-TRANSACTIONAL READS Log.error("must perform all queries inside a transaction") self._execute_backlog() try: if isinstance(sql, SQL): sql = text(sql) if param: sql = expand_template(sql, quote_param(param)) sql = self.preamble + outdent(sql) self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) if row_tuples: if stream: result = self.cursor else: result = wrap(list(self.cursor)) else: columns = tuple( utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])) def streamer(): for row in self.cursor: output = Data() for c, v in zip(columns, row): output[c] = v yield output if stream: result = streamer() else: result = wrap(streamer()) return result except Exception as e: e = Except.wrap(e) if "InterfaceError" in e: Log.error("Did you close the db connection?", e) Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
def delete(self): try: if os.path.isdir(self._filename): shutil.rmtree(self._filename) elif os.path.isfile(self._filename): os.remove(self._filename) return self except Exception as e: e = Except.wrap(e) if "The system cannot find the path specified" in e: return Log.error("Could not remove file", e)
def delete_daemon(file, caller_stack, please_stop): # WINDOWS WILL HANG ONTO A FILE FOR A BIT AFTER WE CLOSED IT while not please_stop: try: file.delete() return except Exception as e: e = Except.wrap(e) e.trace = e.trace[0:2]+caller_stack Log.warning(u"problem deleting file {{file}}", file=file.abspath, cause=e) (Till(seconds=10)|please_stop).wait()
def test_trace_of_simple_raises(self): try: problem_a() except Exception as e: f = Except.wrap(e) self.assertEqual(f.template, "Exception: expected exception") for i, m in enumerate(listwrap(f.trace).method): if m == "test_trace_of_simple_raises": self.assertEqual(i, 2) break else: self.fail("expecting stack to show this method")
def test_full_trace_on_wrap(self): try: problem_b() except Exception as e: cause = Except.wrap(e) self.assertEqual(cause.template, "Exception: expected exception") for i, m in enumerate(listwrap(cause.trace).method): if m == "test_full_trace_on_wrap": self.assertEqual(i, 1) break else: self.fail("expecting stack to show this method")
def relay_post(path): try: return cache.request("post", path, flask.request.headers) except Exception as e: e = Except.wrap(e) Log.warning("could not handle request", cause=e) return Response( unicode2utf8(value2json(e, pretty=True)), status=400, headers={ "Content-Type": "text/html" } )
def delete_daemon(file, caller_stack, please_stop): # WINDOWS WILL HANG ONTO A FILE FOR A BIT AFTER WE CLOSED IT while not please_stop: try: file.delete() return except Exception as e: e = Except.wrap(e) e.trace = e.trace[0:2] + caller_stack Log.warning(u"problem deleting file {{file}}", file=file.abspath, cause=e) (Till(seconds=10) | please_stop).wait()
def ujson_encode(value, pretty=False): if pretty: return pretty_json(value) try: scrubbed = scrub(value) return ujson_dumps(scrubbed, ensure_ascii=False, sort_keys=True, escape_forward_slashes=False).decode('utf8') except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) raise e
def encode(self, value, pretty=False): if pretty: return pretty_json(value) try: scrubbed = scrub(value) return unicode(self.encoder.encode(scrubbed)) except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=_repr(value), cause=e) raise e
def raise_error(e, packed): err = text_type(e) e = Except.wrap(e) if err.startswith(func_name) and ("takes at least" in err or "required positional argument" in err): missing = [p for p in params if str(p) not in packed] given = [p for p in params if str(p) in packed] get_logger().error( "Problem calling {{func_name}}: Expecting parameter {{missing}}, given {{given}}", func_name=func_name, missing=missing, given=given, stack_depth=2 ) get_logger().error("Error dispatching call", e)
def value2json(obj, pretty=False, sort_keys=False): try: json = json_encoder(obj, pretty=pretty) if json == None: Log.note(str(type(obj)) + " is not valid{{type}}JSON", type= " (pretty) " if pretty else " ") Log.error("Not valid JSON: " + str(obj) + " of type " + str(type(obj))) return json except Exception as e: e = Except.wrap(e) try: json = pypy_json_encode(obj) return json except Exception: pass Log.error("Can not encode into JSON: {{value}}", value=repr(obj), cause=e)
def encode(self, value, pretty=False): if pretty: return pretty_json(value) try: with Timer("scrub", too_long=0.1): scrubbed = scrub(value) with Timer("encode", too_long=0.1): return text_type(self.encoder(scrubbed)) except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) raise e
def _run(self): self.id = get_ident() with RegisterThread(self): try: if self.target is not None: a, k, self.args, self.kwargs = self.args, self.kwargs, None, None self.end_of_thread.response = self.target(*a, **k) self.parent.remove_child(self) # IF THREAD ENDS OK, THEN FORGET ABOUT IT except Exception as e: e = Except.wrap(e) with self.synch_lock: self.end_of_thread.exception = e with self.parent.child_lock: emit_problem = self not in self.parent.children if emit_problem: # THREAD FAILURES ARE A PROBLEM ONLY IF NO ONE WILL BE JOINING WITH IT try: Log.fatal("Problem in thread {{name|quote}}", name=self.name, cause=e) except Exception: sys.stderr.write(str("ERROR in thread: " + self.name + " " + text_type(e) + "\n")) finally: try: with self.child_lock: children = copy(self.children) for c in children: try: DEBUG and sys.stdout.write(str("Stopping thread " + c.name + "\n")) c.stop() except Exception as e: Log.warning("Problem stopping thread {{thread}}", thread=c.name, cause=e) for c in children: try: DEBUG and sys.stdout.write(str("Joining on thread " + c.name + "\n")) c.join() except Exception as e: Log.warning("Problem joining thread {{thread}}", thread=c.name, cause=e) finally: DEBUG and sys.stdout.write(str("Joined on thread " + c.name + "\n")) del self.target, self.args, self.kwargs DEBUG and Log.note("thread {{name|quote}} stopping", name=self.name) except Exception as e: DEBUG and Log.warning("problem with thread {{name|quote}}", cause=e, name=self.name) finally: self.stopped.go() DEBUG and Log.note("thread {{name|quote}} is done", name=self.name)
def get_file(ref, url): if ref.path.startswith("~"): home_path = os.path.expanduser("~") if os.sep == "\\": home_path = "/" + home_path.replace(os.sep, "/") if home_path.endswith("/"): home_path = home_path[:-1] ref.path = home_path + ref.path[1::] elif not ref.path.startswith("/"): # CONVERT RELATIVE TO ABSOLUTE if ref.path[0] == ".": num_dot = 1 while ref.path[num_dot] == ".": num_dot += 1 parent = url.path.rstrip("/").split("/")[:-num_dot] ref.path = "/".join(parent) + ref.path[num_dot:] else: parent = url.path.rstrip("/").split("/")[:-1] ref.path = "/".join(parent) + "/" + ref.path path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\") try: if DEBUG: Log.note("reading file {{path}}", path=path) content = File(path).read() except Exception as e: content = None Log.error("Could not read file {{filename}}", filename=path, cause=e) try: new_value = json2value(content, params=ref.query, flexible=True, leaves=True) except Exception as e: e = Except.wrap(e) try: new_value = ini2value(content) except Exception: raise Log.error("Can not read {{file}}", file=path, cause=e) new_value = _replace_ref(new_value, ref) return new_value
def assertRaises(self, problem, function, *args, **kwargs): try: function(*args, **kwargs) except Exception as e: f = Except.wrap(e) if is_text(problem): if problem in f: return Log.error( "expecting an exception returning {{problem|quote}} got something else instead", problem=problem, cause=f ) elif not isinstance(f, problem) and not isinstance(e, problem): Log.error("expecting an exception of type {{type}} to be raised", type=problem) else: return Log.error("Expecting an exception to be raised")
def encode(self, value, pretty=False): if pretty: return pretty_json(value) try: with Timer("scrub", too_long=0.1): scrubbed = scrub(value) param = {"size": 0} with Timer("encode {{size}} characters", param=param, too_long=0.1): output = text_type(self.encoder(scrubbed)) param["size"] = len(output) return output except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) raise e
def replacer(found): ops = found.group(1).split("|") path = ops[0] var = path.lstrip(".") depth = min(len(seq), max(1, len(path) - len(var))) try: val = seq[-depth] if var: if is_sequence(val) and float(var) == _round(float(var), 0): val = val[int(var)] else: val = val[var] for func_name in ops[1:]: parts = func_name.split('(') if len(parts) > 1: val = eval(parts[0] + "(val, " + ("(".join(parts[1::]))) else: val = FORMATTERS[func_name](val) val = toString(val) return val except Exception as e: from mo_logs import Except e = Except.wrap(e) try: if e.message.find("is not JSON serializable"): # WORK HARDER val = toString(val) return val except Exception as f: if not _Log: _late_import() _Log.warning( "Can not expand " + "|".join(ops) + " in template: {{template_|json}}", template_=template, cause=e ) return "[template expansion error: (" + str(e.message) + ")]"
def write_lines(self, key, lines): self._verify_key_format(key) storage = self.bucket.new_key(key + ".json.gz") buff = TemporaryFile() archive = gzip.GzipFile(fileobj=buff, mode='w') count = 0 for l in lines: if hasattr(l, "__iter__"): for ll in l: archive.write(ll.encode("utf8")) archive.write(b"\n") count += 1 else: archive.write(l.encode("utf8")) archive.write(b"\n") count += 1 archive.close() file_length = buff.tell() retry = 3 while retry: try: with Timer("Sending {{count}} lines in {{file_length|comma}} bytes", {"file_length": file_length, "count": count}, silent=not self.settings.debug): buff.seek(0) storage.set_contents_from_file(buff) break except Exception as e: e = Except.wrap(e) retry -= 1 if retry == 0 or 'Access Denied' in e or "No space left on device" in e: Log.error("could not push data to s3", cause=e) else: Log.warning("could not push data to s3", cause=e) if self.settings.public: storage.set_acl('public-read') return
def _scrub(value, is_done): type_ = value.__class__ if type_ in (NoneType, NullType): return None elif type_ is unicode: value_ = value.strip() if value_: return value_ else: return None elif type_ is float: if math.isnan(value) or math.isinf(value): return None return value elif type_ in (int, long, bool): return value elif type_ in (date, datetime): return float(datetime2unix(value)) elif type_ is timedelta: return value.total_seconds() elif type_ is Date: return float(value.unix) elif type_ is Duration: return float(value.seconds) elif type_ is str: return utf82unicode(value) elif type_ is Decimal: return float(value) elif type_ is Data: return _scrub(_get(value, '_dict'), is_done) elif isinstance(value, Mapping): _id = id(value) if _id in is_done: Log.warning("possible loop in structure detected") return '"<LOOP IN STRUCTURE>"' is_done.add(_id) output = {} for k, v in value.iteritems(): if isinstance(k, basestring): pass elif hasattr(k, "__unicode__"): k = unicode(k) else: Log.error("keys must be strings") v = _scrub(v, is_done) if v != None or isinstance(v, Mapping): output[k] = v is_done.discard(_id) return output elif type_ in (tuple, list, FlatList): output = [] for v in value: v = _scrub(v, is_done) output.append(v) return output elif type_ is type: return value.__name__ elif type_.__name__ == "bool_": # DEAR ME! Numpy has it's own booleans (value==False could be used, but 0==False in Python. DOH!) if value == False: return False else: return True elif not isinstance(value, Except) and isinstance(value, Exception): return _scrub(Except.wrap(value), is_done) elif hasattr(value, '__data__'): try: return _scrub(value.__data__(), is_done) except Exception as e: Log.error("problem with calling __json__()", e) elif hasattr(value, 'co_code') or hasattr(value, "f_locals"): return None elif hasattr(value, '__iter__'): output = [] for v in value: v = _scrub(v, is_done) output.append(v) return output elif hasattr(value, '__call__'): return repr(value) else: return _scrub(DataObject(value), is_done)
def life_cycle_watcher(please_stop): failed_attempts=Data() while not please_stop: spot_requests = self._get_managed_spot_requests() last_get = Date.now() instances = wrap({i.id: i for r in self.ec2_conn.get_all_instances() for i in r.instances}) # INSTANCES THAT REQUIRE SETUP time_to_stop_trying = {} please_setup = [ (i, r) for i, r in [(instances[r.instance_id], r) for r in spot_requests] if i.id and not i.tags.get("Name") and i._state.name == "running" and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP ] for i, r in please_setup: try: p = self.settings.utility[i.instance_type] if p == None: try: self.ec2_conn.terminate_instances(instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) finally: Log.error("Can not setup unknown {{instance_id}} of type {{type}}", instance_id=i.id, type=i.instance_type) i.markup = p try: self.instance_manager.setup(i, coalesce(p, 0)) except Exception as e: e = Except.wrap(e) failed_attempts[r.id] += [e] Log.error(ERROR_ON_CALL_TO_SETUP, e) i.add_tag("Name", self.settings.ec2.instance.name + " (running)") with self.net_new_locker: self.net_new_spot_requests.remove(r.id) except Exception as e: if not time_to_stop_trying.get(i.id): time_to_stop_trying[i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN if Date.now() > time_to_stop_trying[i.id]: # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE self.ec2_conn.terminate_instances(instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) Log.warning("Problem with setup of {{instance_id}}. Time is up. Instance TERMINATED!", instance_id=i.id, cause=e) elif "Can not setup unknown " in e: Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e) elif ERROR_ON_CALL_TO_SETUP in e: if len(failed_attempts[r.id]) > 2: Log.warning("Problem with setup() of {{instance_id}}", instance_id=i.id, cause=failed_attempts[r.id]) else: Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e) if Date.now() - last_get > 5 * SECOND: # REFRESH STALE spot_requests = self._get_managed_spot_requests() last_get = Date.now() pending = wrap([r for r in spot_requests if r.status.code in PENDING_STATUS_CODES]) give_up = wrap([r for r in spot_requests if r.status.code in PROBABLY_NOT_FOR_A_WHILE | TERMINATED_STATUS_CODES]) ignore = wrap([r for r in spot_requests if r.status.code in MIGHT_HAPPEN]) # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT if self.done_spot_requests: with self.net_new_locker: expired = Date.now() - self.settings.run_interval + 2 * MINUTE for ii in list(self.net_new_spot_requests): if Date(ii.create_time) < expired: ## SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS self.net_new_spot_requests.remove(ii) for g in give_up: self.net_new_spot_requests.remove(g.id) for g in ignore: self.net_new_spot_requests.remove(g.id) pending = UniqueIndex(("id",), data=pending) pending = pending | self.net_new_spot_requests if give_up: self.ec2_conn.cancel_spot_instance_requests(request_ids=give_up.id) Log.note("Cancelled spot requests {{spots}}, {{reasons}}", spots=give_up.id, reasons=give_up.status.code) if not pending and not time_to_stop_trying and self.done_spot_requests: Log.note("No more pending spot requests") please_stop.go() break elif pending: Log.note("waiting for spot requests: {{pending}}", pending=[p.id for p in pending]) (Till(seconds=10) | please_stop).wait() Log.note("life cycle watcher has stopped")
def json2value(json_string, params=Null, flexible=False, leaves=False): """ :param json_string: THE JSON :param params: STANDARD JSON PARAMS :param flexible: REMOVE COMMENTS :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED :return: Python value """ if isinstance(json_string, str): Log.error("only unicode json accepted") try: if flexible: # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58 json_string = re.sub(r"\"\"\".*?\"\"\"", r"\n", json_string, flags=re.MULTILINE) json_string = "\n".join(remove_line_comment(l) for l in json_string.split("\n")) # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA json_string = re.sub(r",\s*\}", r"}", json_string) # ALLOW LISTS TO END WITH COMMA json_string = re.sub(r",\s*\]", r"]", json_string) if params: # LOOKUP REFERENCES json_string = expand_template(json_string, params) try: value = wrap(json_decoder(unicode(json_string))) except Exception as e: Log.error("can not decode\n{{content}}", content=json_string, cause=e) if leaves: value = wrap_leaves(value) return value except Exception as e: e = Except.wrap(e) if not json_string.strip(): Log.error("JSON string is only whitespace") c = e while "Expecting '" in c.cause and "' delimiter: line" in c.cause: c = c.cause if "Expecting '" in c and "' delimiter: line" in c: line_index = int(strings.between(c.message, " line ", " column ")) - 1 column = int(strings.between(c.message, " column ", " ")) - 1 line = json_string.split("\n")[line_index].replace("\t", " ") if column > 20: sample = "..." + line[column - 20:] pointer = " " + (" " * 20) + "^" else: sample = line pointer = (" " * column) + "^" if len(sample) > 43: sample = sample[:43] + "..." Log.error("Can not decode JSON at:\n\t" + sample + "\n\t" + pointer + "\n") base_str = strings.limit(json_string, 1000).encode('utf8') hexx_str = bytes2hex(base_str, " ") try: char_str = " " + " ".join((c.decode("latin1") if ord(c) >= 32 else ".") for c in base_str) except Exception as e: char_str = " " Log.error("Can not decode JSON:\n" + char_str + "\n" + hexx_str + "\n", e)
def tuid_endpoint(path): with RegisterThread(): try: service.statsdaemon.update_requests(requests_total=1) if flask.request.headers.get("content-length", "") in ["", "0"]: # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK service.statsdaemon.update_requests(requests_complete=1, requests_passed=1) return Response( EXPECTING_QUERY, status=400, headers={ "Content-Type": "text/html" } ) elif int(flask.request.headers["content-length"]) > QUERY_SIZE_LIMIT: service.statsdaemon.update_requests(requests_complete=1, requests_passed=1) return Response( unicode2utf8("request too large"), status=400, headers={ "Content-Type": "text/html" } ) request_body = flask.request.get_data().strip() query = json2value(utf82unicode(request_body)) # ENSURE THE QUERY HAS THE CORRECT FORM if query['from'] != 'files': Log.error("Can only handle queries on the `files` table") ands = listwrap(query.where['and']) if len(ands) != 3: Log.error( 'expecting a simple where clause with following structure\n{{example|json}}', example={"and": [ {"eq": {"branch": "<BRANCH>"}}, {"eq": {"revision": "<REVISION>"}}, {"in": {"path": ["<path1>", "<path2>", "...", "<pathN>"]}} ]} ) rev = None paths = None branch_name = None for a in ands: rev = coalesce(rev, a.eq.revision) paths = unwraplist(coalesce(paths, a['in'].path, a.eq.path)) branch_name = coalesce(branch_name, a.eq.branch) paths = listwrap(paths) if len(paths) == 0: response, completed = [], True elif service.conn.pending_transactions > TOO_BUSY: # CHECK IF service IS VERY BUSY # TODO: BE SURE TO UPDATE STATS TOO Log.note("Too many open transactions") response, completed = [], False elif service.get_thread_count() > TOO_MANY_THREADS: Log.note("Too many threads open") response, completed = [], False else: # RETURN TUIDS with Timer("tuid internal response time for {{num}} files", {"num": len(paths)}): response, completed = service.get_tuids_from_files( revision=rev, files=paths, going_forward=True, repo=branch_name ) if not completed: Log.note( "Request for {{num}} files is incomplete for revision {{rev}}.", num=len(paths), rev=rev ) if query.meta.format == 'list': formatter = _stream_list else: formatter = _stream_table service.statsdaemon.update_requests( requests_complete=1 if completed else 0, requests_incomplete=1 if not completed else 0, requests_passed=1 ) return Response( formatter(response), status=200 if completed else 202, headers={ "Content-Type": "application/json" } ) except Exception as e: e = Except.wrap(e) service.statsdaemon.update_requests(requests_incomplete=1, requests_failed=1) Log.warning("could not handle request", cause=e) return Response( unicode2utf8(value2json(e, pretty=True)), status=400, headers={ "Content-Type": "text/html" } )
def unicode2Date(value, format=None): """ CONVERT UNICODE STRING TO UNIX TIMESTAMP VALUE """ # http://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior if value == None: return None if format != None: try: if format.endswith("%S.%f") and "." not in value: value += ".000" return _unix2Date(datetime2unix(datetime.strptime(value, format))) except Exception as e: from mo_logs import Log Log.error("Can not format {{value}} with {{format}}", value=value, format=format, cause=e) value = value.strip() if value.lower() == "now": return _unix2Date(datetime2unix(_utcnow())) elif value.lower() == "today": return _unix2Date(math.floor(datetime2unix(_utcnow()) / 86400) * 86400) elif value.lower() in ["eod", "tomorrow"]: return _unix2Date(math.floor(datetime2unix(_utcnow()) / 86400) * 86400 + 86400) if any(value.lower().find(n) >= 0 for n in ["now", "today", "eod", "tomorrow"] + list(MILLI_VALUES.keys())): return parse_time_expression(value) try: # 2.7 DOES NOT SUPPORT %z local_value = parse_date(value) #eg 2014-07-16 10:57 +0200 return _unix2Date(datetime2unix((local_value - local_value.utcoffset()).replace(tzinfo=None))) except Exception as e: e = Except.wrap(e) # FOR DEBUGGING pass formats = [ "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f" ] for f in formats: try: return _unix2Date(datetime2unix(datetime.strptime(value, f))) except Exception: pass deformats = [ "%Y-%m",# eg 2014-07-16 10:57 +0200 "%Y%m%d", "%d%m%Y", "%d%m%y", "%d%b%Y", "%d%b%y", "%d%B%Y", "%d%B%y", "%Y%m%d%H%M%S", "%Y%m%dT%H%M%S", "%d%m%Y%H%M%S", "%d%m%y%H%M%S", "%d%b%Y%H%M%S", "%d%b%y%H%M%S", "%d%B%Y%H%M%S", "%d%B%y%H%M%S" ] value = deformat(value) for f in deformats: try: return unicode2Date(value, format=f) except Exception: pass else: from mo_logs import Log Log.error("Can not interpret {{value}} as a datetime", value=value)
def _db_worker(self, please_stop): while not please_stop: try: with self._db_transaction(): result = self._query( SQL_SELECT + all_columns + SQL_FROM + db_table_name + SQL_WHERE + "last_updated > " + quote_value(self.last_load) + SQL_ORDERBY + sql_list(map(quote_column, ["es_index", "name", "es_column"])) ) with self.locker: for r in result.data: c = row_to_column(result.header, r) self._add(c) if c.last_updated > self.last_load: self.last_load = c.last_updated updates = self.todo.pop_all() DEBUG and updates and Log.note( "{{num}} columns to push to db", num=len(updates) ) for action, column in updates: while not please_stop: try: with self._db_transaction(): DEBUG and Log.note( "{{action}} db for {{table}}.{{column}}", action=action, table=column.es_index, column=column.es_column, ) if action is EXECUTE: self.db.execute(column) elif action is UPDATE: self.db.execute( "UPDATE" + db_table_name + "SET" + sql_list( [ "count=" + quote_value(column.count), "cardinality=" + quote_value(column.cardinality), "multi=" + quote_value(column.multi), "partitions=" + quote_value( value2json(column.partitions) ), "last_updated=" + quote_value(column.last_updated), ] ) + SQL_WHERE + SQL_AND.join( [ "es_index = " + quote_value(column.es_index), "es_column = " + quote_value(column.es_column), "last_updated < " + quote_value(column.last_updated), ] ) ) elif action is DELETE: self.db.execute( "DELETE FROM" + db_table_name + SQL_WHERE + SQL_AND.join( [ "es_index = " + quote_value(column.es_index), "es_column = " + quote_value(column.es_column), ] ) ) else: self._db_insert_column(column) break except Exception as e: e = Except.wrap(e) if "database is locked" in e: Log.note("metadata database is locked") Till(seconds=1).wait() break else: Log.warning("problem updataing database", cause=e) except Exception as e: Log.warning("problem updating database", cause=e) (Till(seconds=10) | please_stop).wait()