def process(source_key, source, destination, resources, please_stop=None): lines = source.read_lines() etl_header = convert.json2value(lines[0]) if etl_header.etl: start = 0 elif etl_header.locale or etl_header._meta: start = 0 else: start = 1 keys = [] records = [] stats = Dict() for i, line in enumerate(lines[start:]): pulse_record = Null try: pulse_record = scrub_pulse_record(source_key, i, line, stats) if not pulse_record: continue with Profiler("transform_buildbot"): record = transform_buildbot(pulse_record.payload, resources=resources) record.etl = { "id": i, "source": pulse_record.etl, "type": "join", "revision": get_git_revision() } key = etl2key(record.etl) keys.append(key) records.append({"id": key, "value": record}) except Exception, e: Log.warning("Problem with pulse payload {{pulse|json}}", pulse=pulse_record.payload, cause=e)
def event_loop(self, please_stop): got_stop_message = False while not please_stop.is_go(): with Timer("get more work", debug=DEBUG): request = self.in_queue.pop() if request == Thread.STOP: if DEBUG: Log.note("{{name}} got a stop message", name=self.name) got_stop_message = True if self.in_queue: Log.warning( "programmer error, queue not empty. {{num}} requests lost:\n{{requests}}", num=len(self.in_queue.queue), requests=list(self.in_queue.queue)[:5:] + list(self.in_queue.queue)[-5::], ) break if please_stop.is_go(): break with Timer("run {{function}}", {"function": get_function_name(self.function)}, debug=DEBUG): try: result = self.function(**request) if self.out_queue != None: self.out_queue.add({"response": result}) except Exception, e: Log.warning("Can not execute with params={{params}}", params=request, cause=e) if self.out_queue != None: self.out_queue.add({"exception": e}) finally: self.num_runs += 1
def _worker(self, please_stop): curr = "0.0" acc = [] last_count_written = -1 next_write = Date.now() while not please_stop: d = self.temp_queue.pop(timeout=MINUTE) if d == None: if not acc: continue # WRITE THE INCOMPLETE DATA TO S3, BUT NOT TOO OFTEN next_write = Date.now() + MINUTE try: if last_count_written != len(acc): if DEBUG: Log.note("write incomplete data ({{num}} lines) to {{uid}} in S3 next (time = {{next_write}})", uid=curr, next_write=next_write, num=len(acc)) self.bucket.write_lines(curr, (convert.value2json(a) for a in acc)) last_count_written = len(acc) except Exception, e: Log.note("Problem with write to S3", cause=e) elif d[UID_PATH] != curr: # WRITE acc TO S3 IF WE ARE MOVING TO A NEW KEY try: if acc: if DEBUG: Log.note("write complete data ({{num}} lines) to {{curr}} in S3", num=len(acc), curr=curr) self.bucket.write_lines(curr, (convert.value2json(a) for a in acc)) last_count_written = 0 curr = d[UID_PATH] acc = [d] except Exception, e: Log.warning("Can not store data", cause=e) Thread.sleep(30*MINUTE)
def loop(self, please_stop): with self.work_queue: while not please_stop: if self.settings.wait_forever: todo = None while not please_stop and not todo: if isinstance(self.work_queue, aws.Queue): todo = self.work_queue.pop(wait=EXTRA_WAIT_TIME) else: todo = self.work_queue.pop() else: if isinstance(self.work_queue, aws.Queue): todo = self.work_queue.pop() else: todo = self.work_queue.pop(till=Date.now()) if todo == None: please_stop.go() return try: is_ok = self._dispatch_work(todo) if is_ok: self.work_queue.commit() else: self.work_queue.rollback() except Exception, e: self.work_queue.rollback() Log.warning( "could not processs {{key}}. Returned back to work queue.", key=todo.key, cause=e)
def _get_from_elasticsearch(self, revision, locale=None): rev = revision.changeset.id query = { "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"prefix": {"changeset.id": rev[0:12]}}, {"term": {"branch.name": revision.branch.name}}, {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}} ]} }}, "size": 2000, } try: docs = self.es.search(query, timeout=120).hits.hits if len(docs) > 1: for d in docs: if d._id.endswith(d._source.branch.locale): return d._source Log.warning("expecting no more than one document") return docs[0]._source except Exception, e: Log.warning("Bad ES call", e) return None
def monitor(self, please_stop): please_stop.on_go(lambda: self.todo.add(Thread.STOP)) while not please_stop: try: if not self.todo: with self.columns.locker: old_columns = filter( lambda c: (c.last_updated == None or c.last_updated < Date.now()-TOO_OLD) and c.type not in ["object", "nested"], self.columns ) if old_columns: self.todo.extend(old_columns) # TEST CONSISTENCY for c, d in product(list(self.todo.queue), list(self.todo.queue)): if c.abs_name==d.abs_name and c.table==d.table and c!=d: Log.error("") else: Log.note("no more metatdata to update") column = self.todo.pop(timeout=10*MINUTE) if column: if column.type in ["object", "nested"]: continue elif column.last_updated >= Date.now()-TOO_OLD: continue try: self._update_cardinality(column) Log.note("updated {{column.name}}", column=column) except Exception, e: Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e) except Exception, e: Log.warning("problem in cardinality monitor", cause=e)
def record_request(request, query_, data, error): try: if request_log_queue == None: return if data and len(data) > 10000: data = data[:10000] log = wrap({ "timestamp": Date.now(), "http_user_agent": request.headers.get("user_agent"), "http_accept_encoding": request.headers.get("accept_encoding"), "path": request.headers.environ["werkzeug.request"].full_path, "content_length": request.headers.get("content_length"), "remote_addr": request.remote_addr, "query": query_, "data": data, "error": error }) log["from"] = request.headers.get("from") request_log_queue.add({"value": log}) except Exception, e: Log.warning("Can not record", cause=e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) aws_args = dict( region_name=settings.aws.region, aws_access_key_id=unwrap(settings.aws.aws_access_key_id), aws_secret_access_key=unwrap(settings.aws.aws_secret_access_key) ) ec2_conn = boto_ec2.connect_to_region(**aws_args) instances = _get_managed_instances(ec2_conn, settings.name) for i in instances: Log.note("Reset {{instance_id}} ({{name}}) at {{ip}}", insance_id=i.id, name=i.tags["Name"], ip=i.ip_address) _config_fabric(settings.fabric, i) try: _refresh_etl() # TODO: UPON FAILURE, TERMINATE INSTANCE AND SPOT REQUEST except Exception, e: ec2_conn.terminate_instances([i.id]) Log.warning("Problem resetting {{instance}}, terminated", instance=i.id, cause=e) except Exception, e: Log.error("Problem with etl", e)
def copy(self, keys, source, sample_only_filter=None, sample_size=None, done_copy=None): """ :param keys: THE KEYS TO LOAD FROM source :param source: THE SOURCE (USUALLY S3 BUCKET) :param sample_only_filter: SOME FILTER, IN CASE YOU DO NOT WANT TO SEND EVERYTHING :param sample_size: FOR RANDOM SAMPLE OF THE source DATA :param done_copy: CALLBACK, ADDED TO queue, TO FINISH THE TRANSACTION :return: LIST OF SUB-keys PUSHED INTO ES """ num_keys = 0 queue = None for key in keys: timer = Timer("key") try: with timer: for rownum, line in enumerate(source.read_lines(strip_extension(key))): if not line: continue if rownum > 0 and rownum % 1000 == 0: Log.note("Ingested {{num}} records from {{key}} in bucket {{bucket}}", num=rownum, key=key, bucket=source.name) row, please_stop = fix(rownum, line, source, sample_only_filter, sample_size) num_keys += 1 if queue == None: queue = self._get_queue(row) queue.add(row) if please_stop: break except Exception, e: done_copy = None Log.warning("Could not process {{key}} after {{duration|round(places=2)}}seconds", key=key, duration=timer.duration.seconds, cause=e)
def time_delta_pusher(please_stop, appender, queue, interval): """ appender - THE FUNCTION THAT ACCEPTS A STRING queue - FILLED WITH LOG ENTRIES {"template":template, "params":params} TO WRITE interval - timedelta USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL """ next_run = time() + interval while not please_stop: (Till(till=next_run) | please_stop).wait() next_run = time() + interval logs = queue.pop_all() if not logs: continue lines = [] for log in logs: try: if log is Thread.STOP: please_stop.go() next_run = time() else: expanded = expand_template(log.get("template"), log.get("params")) lines.append(expanded) except Exception, e: Log.warning("Trouble formatting logs", cause=e) # SWALLOW ERROR, GOT TO KEEP RUNNING try: appender(u"\n".join(lines) + u"\n") except Exception, e: sys.stderr.write(b"Trouble with appender: " + str(e.message) + b"\n")
def write(self, template, params): try: with self.file_lock: self.file.append(expand_template(template, params)) except Exception, e: _Log.warning("Problem writing to file {{file}}, waiting...", file=file.name, cause=e) _Till(seconds=5).wait()
def main(): """ CLEAR OUT KEYS FROM BUCKET BY RANGE, OR BY FILE """ settings = startup.read_settings(defs=[ { "name": ["--bucket"], "help": "bucket to scan", "type": str, "dest": "bucket", "required": True } ]) Log.start(settings.debug) source = Connection(settings.aws).get_bucket(settings.args.bucket) for k in qb.sort(source.keys()): try: data = source.read_bytes(k) if convert.ascii2unicode(data).find("2e2834fa7ecd8d3bb1ad49ec981fdb89eb4df95e18") >= 0: Log.note("Found at {{key}}", key=k) except Exception, e: Log.warning("Problem with {{key}}", key=k, cause=e) finally:
def replacer(found): ops = found.group(1).split("|") path = ops[0] var = path.lstrip(".") depth = min(len(seq), max(1, len(path) - len(var))) try: val = seq[-depth] if var: val = val[var] for func_name in ops[1:]: parts = func_name.split('(') if len(parts) > 1: val = eval(parts[0] + "(val, " + ("(".join(parts[1::]))) else: val = globals()[func_name](val) val = toString(val) return val except Exception, e: try: if e.message.find("is not JSON serializable"): # WORK HARDER val = toString(val) return val except Exception, f: if not _Log: _late_import() _Log.warning("Can not expand " + "|".join(ops) + " in template: {{template_|json}}", template_=template, cause=e)
def replacer(found): ops = found.group(1).split("|") path = ops[0] var = path.lstrip(".") depth = min(len(seq), max(1, len(path) - len(var))) try: val = seq[-depth] if var: val = val[var] for func_name in ops[1:]: parts = func_name.split('(') if len(parts) > 1: val = eval(parts[0] + "(val, " + ("(".join(parts[1::]))) else: val = globals()[func_name](val) val = toString(val) return val except Exception, e: try: if e.message.find("is not JSON serializable"): # WORK HARDER val = toString(val) return val except Exception, f: if not _Log: _late_import() _Log.warning( "Can not expand " + "|".join(ops) + " in template: {{template_|json}}", template_=template, cause=e )
def main(): try: settings = startup.read_settings() Log.start(settings.debug) constants.set(settings.constants) with startup.SingleInstance(flavor_id=settings.args.filename): with aws.s3.Bucket(settings.destination) as bucket: if settings.param.debug: if settings.source.durable: Log.error("Can not run in debug mode with a durable queue") synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False)) else: synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False)) if settings.source.durable: synch.startup() queue = PersistentQueue(settings.param.queue_file) if queue: last_item = queue[len(queue) - 1] synch.source_key = last_item._meta.count + 1 with pulse.Consumer(settings=settings.source, target=None, target_queue=queue, start=synch.source_key): Thread.run("pulse log loop", log_loop, settings, synch, queue, bucket) Thread.wait_for_shutdown_signal(allow_exit=True) Log.warning("starting shutdown") queue.close() Log.note("write shutdown state to S3") synch.shutdown() except Exception, e: Log.error("Problem with etl", e)
def write_lines(self, key, lines): self._verify_key_format(key) storage = self.bucket.new_key(key + ".json.gz") buff = TemporaryFile() archive = gzip.GzipFile(fileobj=buff, mode='w') count = 0 for l in lines: if hasattr(l, "__iter__"): for ll in l: archive.write(ll.encode("utf8")) archive.write(b"\n") count += 1 else: archive.write(l.encode("utf8")) archive.write(b"\n") count += 1 archive.close() file_length = buff.tell() retry = 3 while retry: try: with Timer("Sending {{count}} lines in {{file_length|comma}} bytes", {"file_length": file_length, "count": count}, debug=self.settings.debug): buff.seek(0) storage.set_contents_from_file(buff) break except Exception, e: Log.warning("could not push data to s3", cause=e) retry -= 1
def go(self): """ ACTIVATE SIGNAL (DOES NOTHING IF SIGNAL IS ALREADY ACTIVATED) """ if DEBUG: if not _Log: _late_import() _Log.note("GO! {{name|quote}}", name=self.name) with self.lock: if DEBUG: _Log.note("internal GO! {{name|quote}}", name=self.name) if self._go: return self._go = True jobs, self.job_queue = self.job_queue, [] threads, self.waiting_threads = self.waiting_threads, [] for t in threads: if DEBUG: _Log.note("Release") t.release() for j in jobs: try: j() except Exception, e: if not _Log: _late_import() _Log.warning("Trigger on Signal.go() failed!", cause=e)
def pypy_json_encode(value, pretty=False): """ pypy DOES NOT OPTIMIZE GENERATOR CODE WELL """ global _dealing_with_problem if pretty: return pretty_json(value) try: _buffer = UnicodeBuilder(2048) _value2json(value, _buffer) output = _buffer.build() return output except Exception, e: # THE PRETTY JSON WILL PROVIDE MORE DETAIL ABOUT THE SERIALIZATION CONCERNS from pyLibrary.debugs.logs import Log if _dealing_with_problem: Log.error("Serialization of JSON problems", e) else: Log.warning("Serialization of JSON problems", e) _dealing_with_problem = True try: return pretty_json(value) except Exception, f: Log.error("problem serializing object", f)
def daemon(cls, please_stop): global next_ping Till.enabled = True try: while not please_stop: now = _time() with Till.locker: if next_ping > now: _sleep(min(next_ping - now, INTERVAL)) continue next_ping = now + INTERVAL work = None if Till.all_timers: Till.all_timers.sort(key=lambda r: r[0]) for i, (t, s) in enumerate(Till.all_timers): if now < t: work, Till.all_timers[:i] = Till.all_timers[:i], [] next_ping = min(next_ping, Till.all_timers[0][0]) break else: work, Till.all_timers = Till.all_timers, [] if work: for t, s in work: s.go() except Exception, e: from pyLibrary.debugs.logs import Log Log.warning("timer shutdown", cause=e)
def solve(): try: data = convert.json2value(convert.utf82unicode(flask.request.data)) solved = noop.solve(data) response_data = convert.unicode2utf8(convert.value2json(solved)) return Response( response_data, direct_passthrough=True, # FOR STREAMING status=200, headers={ "access-control-allow-origin": "*", "content-type": "application/json" } ) except Exception, e: e = Except.wrap(e) Log.warning("Could not process", cause=e) e = e.as_dict() return Response( convert.unicode2utf8(convert.value2json(e)), status=400, headers={ "access-control-allow-origin": "*", "content-type": "application/json" } )
def _insert_loop(self, please_stop=None): bad_count = 0 while not please_stop: try: Thread.sleep(seconds=1) messages = wrap(self.queue.pop_all()) if not messages: continue for g, mm in jx.groupby(messages, size=self.batch_size): scrubbed = [] try: for i, message in enumerate(mm): if message is Thread.STOP: please_stop.go() return scrubbed.append(_deep_json_to_string(message, depth=3)) finally: self.es.extend(scrubbed) bad_count = 0 except Exception, e: Log.warning("Problem inserting logs into ES", cause=e) bad_count += 1 if bad_count > MAX_BAD_COUNT: Log.warning("Given up trying to write debug logs to ES index {{index}}", index=self.es.settings.index) Thread.sleep(seconds=30)
def _leap_logging(clock_time, utc_time): global _Log global _leap_log diff = clock_time - LEAP_SECOND with _leap_lock: if not _Log: from pyLibrary.debugs.logs import Log as _Log _Log.warning("preparing for leap seconds near {{date|datetime}}", date=LEAP_SECOND) if abs(diff) < 30: # RECORD AROUND THE LEAP SECOND with _leap_lock: _leap_log.append((clock_time, utc_time)) if diff > 60 and _leap_log: with _leap_lock: temp, _leap_log = _leap_log, Null if temp: clocks, utcs = zip(*temp) _Log.warning( "Leap second logging results\ntime.time\n{{clocks|json}}\ntime.time diff\n{{clocks_diff|json}}\nutcnow\n{{utc|json}}\nutc diff\n{{utc_diff|json}}", clocks=clocks, clocks_diff=[b - a for a, b in zip(clocks, clocks[1:])], utc=utcs, utc_diff=[b - a for a, b in zip(utcs, utcs[1:])] )
def _worker(self, please_stop): while not please_stop: try: self.pulse.listen() except Exception, e: if not please_stop: Log.warning("pulse had problem", e)
def loop(self, please_stop): with self.work_queue: while not please_stop: if self.settings.wait_forever: todo = None while not please_stop and not todo: if isinstance(self.work_queue, aws.Queue): todo = self.work_queue.pop(wait=EXTRA_WAIT_TIME) else: todo = self.work_queue.pop() else: if isinstance(self.work_queue, aws.Queue): todo = self.work_queue.pop() else: todo = self.work_queue.pop(till=Date.now()) if todo == None: please_stop.go() return try: is_ok = self._dispatch_work(todo) if is_ok: self.work_queue.commit() else: self.work_queue.rollback() except Exception, e: self.work_queue.rollback() Log.warning("could not processs {{key}}. Returned back to work queue.", key=todo.key, cause=e)
def __init__(self, host, index, type="query", max_size=10, batch_size=10, settings=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ es = Cluster(settings).get_or_create_index(schema=convert.json2value( convert.value2json(SCHEMA), leaves=True), limit_replicas=True, settings=settings) #ENSURE THE TYPE EXISTS FOR PROBING try: es.add({ "id": "dummy", "value": { "hash": "dummy", "create_time": Date.now(), "last_used": Date.now(), "query": {} } }) except Exception, e: Log.warning("Problem saving query", cause=e)
def get_treeherder_job(self): try: with Timer("Process Request"): args = Dict(**flask.request.args) # IS THE branch/revision PENDING? result = self.get_markup(unwraplist(args.branch), unwraplist(args.revision), unwraplist(args.task_id), unwraplist(args.buildername), unwraplist(args.timestamp)) response_data = convert.unicode2utf8( convert.value2json(result)) return Response(response_data, status=200, headers={ "access-control-allow-origin": "*", "content-type": "text/plain" }) except Exception, e: e = Except.wrap(e) Log.warning("Could not process", cause=e) e = e.as_dict() return Response(convert.unicode2utf8(convert.value2json(e)), status=400, headers={ "access-control-allow-origin": "*", "content-type": "application/json" })
def etl_one(settings): queue = Queue("temp work queue") queue.__setattr__(b"commit", Null) queue.__setattr__(b"rollback", Null) settings.param.wait_forever = False already_in_queue = set() for w in settings.workers: source = get_container(w.source) # source.settings.fast_forward = True if id(source) in already_in_queue: continue try: for i in parse_id_argument(settings.args.id): data = source.get_key(i) if data != None: already_in_queue.add(id(source)) queue.add(Dict( bucket=w.source.bucket, key=i )) except Exception, e: if "Key {{key}} does not exist" in e: already_in_queue.add(id(source)) queue.add(Dict( bucket=w.source.bucket, key=settings.args.id )) Log.warning("Problem", cause=e)
def pypy_json_encode(value, pretty=False): """ pypy DOES NOT OPTIMIZE GENERATOR CODE WELL """ global _dealing_with_problem if pretty: return pretty_json(value) try: _buffer = UnicodeBuilder(1024) _value2json(value, _buffer) output = _buffer.build() return output except Exception, e: # THE PRETTY JSON WILL PROVIDE MORE DETAIL ABOUT THE SERIALIZATION CONCERNS from pyLibrary.debugs.logs import Log if _dealing_with_problem: Log.error("Serialization of JSON problems", e) else: Log.warning("Serialization of JSON problems", e) _dealing_with_problem = True try: return pretty_json(value) except Exception, f: Log.error("problem serializing object", f)
def add_log(self, logger): if logger == None: if not _Log: _delayed_imports() _Log.warning("Expecting a non-None logger") self.many.append(logger) return self
def _kill(self): try: self.service.kill() except Exception, e: ee = Except.wrap(e) if "The operation completed successfully" in ee: return Log.warning("Failure to kill process {{process|quote}}", process=self.name, cause=ee)
def _delete_old_indexes(self, candidates): for c in candidates: timestamp = unicode2Date(c.index[-15:], "%Y%m%d_%H%M%S") if timestamp + self.rollover_interval < Date.today() - self.rollover_max: # Log.warning("Will delete {{index}}", index=c.index) try: self.cluster.delete_index(c.index) except Exception, e: Log.warning("could not delete index {{index}}", index=c.index, cause=e)
def log_loop(settings, synch, queue, bucket, please_stop): with aws.Queue(settings.work_queue) as work_queue: for i, g in qb.groupby(queue, size=settings.param.size): Log.note( "Preparing {{num}} pulse messages to bucket={{bucket}}", num=len(g), bucket=bucket.name ) full_key = unicode(synch.next_key) + ":" + unicode(MIN(g.select("_meta.count"))) try: output = [ set_default( d, {"etl": { "name": "Pulse block", "bucket": settings.destination.bucket, "timestamp": Date.now().unix, "id": synch.next_key, "source": { "name": "pulse.mozilla.org", "id": d._meta.count, "count": d._meta.count, "message_id": d._meta.message_id, "sent": Date(d._meta.sent), }, "type": "aggregation" }} ) for i, d in enumerate(g) if d != None # HAPPENS WHEN PERSISTENT QUEUE FAILS TO LOG start ] bucket.write(full_key, "\n".join(convert.value2json(d) for d in output)) synch.advance() synch.source_key = MAX(g.select("_meta.count")) + 1 now = Date.now() work_queue.add({ "bucket": bucket.name, "key": full_key, "timestamp": now.unix, "date/time": now.format() }) synch.ping() queue.commit() Log.note("Wrote {{num}} pulse messages to bucket={{bucket}}, key={{key}} ", num= len(g), bucket= bucket.name, key= full_key) except Exception, e: queue.rollback() if not queue.closed: Log.warning("Problem writing {{key}} to S3", key=full_key, cause=e) if please_stop: break
def _shorten(value, source): value.result.subtests = [s for s in value.result.subtests if s.ok is False] value.result.missing_subtests = True if source.name.startswith("active-data-test-result"): value.repo.changeset.files=None shorter_length = len(convert.value2json(value)) if shorter_length > MAX_RECORD_LENGTH: Log.warning("Monstrous {{name}} record {{id}} of length {{length}}", id=value._id, name=source.name, length=shorter_length)
def now(): candidate = _time() temp = datetime.utcnow() unix = datetime2unix(temp) if abs(candidate - unix) > 0.1: from pyLibrary.debugs.logs import Log Log.warning("_time() and datetime.utcnow() is off by {{amount}}", amount=candidate - unix) return unix2Date(datetime2unix(temp))
def __getitem__(self, item): c = self.schema.get(item) if c: return c e = self.edges[item] if not c: Log.warning("Column with name {{column|quote}} can not be found in {{table}}", column=item, table=self.name) return e
def setup(settings=None): global config try: config = startup.read_settings(defs={ "name": ["--process_num", "--process"], "help": "Additional port offset (for multiple Flask processes", "type": int, "dest": "process_num", "default": 0, "required": False }, filename=settings) constants.set(config.constants) Log.start(config.debug) if config.args.process_num and config.flask.port: config.flask.port += config.args.process_num # PIPE REQUEST LOGS TO ES DEBUG if config.request_logs: request_logger = elasticsearch.Cluster( config.request_logs).get_or_create_index(config.request_logs) active_data.request_log_queue = request_logger.threaded_queue( max_size=2000) # SETUP DEFAULT CONTAINER, SO THERE IS SOMETHING TO QUERY containers.config.default = { "type": "elasticsearch", "settings": config.elasticsearch.copy() } # TURN ON /exit FOR WINDOWS DEBUGGING if config.flask.debug or config.flask.allow_exit: config.flask.allow_exit = None Log.warning("ActiveData is in debug mode") app.add_url_rule('/exit', 'exit', _exit) # TRIGGER FIRST INSTANCE FromESMetadata(config.elasticsearch) if config.saved_queries: setattr(save_query, "query_finder", SaveQueries(config.saved_queries)) HeaderRewriterFix(app, remove_headers=['Date', 'Server']) if config.flask.ssl_context: if config.args.process_num: Log.error( "can not serve ssl and multiple Flask instances at once") setup_ssl() return app except Exception, e: Log.error( "Serious problem with ActiveData service construction! Shutdown!", cause=e)
def __eq__(self, other): Log.warning("expensive") from pyLibrary.testing.fuzzytestcase import assertAlmostEqual try: assertAlmostEqual(convert.json2value(self.json), other) return True except Exception: return False
def __getslice__(self, i, j): global _emit_slice_warning if _emit_slice_warning: _emit_slice_warning=False if not _Log: _late_import() _Log.warning("slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes. Use [start:stop:step] (see https://github.com/klahnakoski/pyLibrary/blob/master/pyLibrary/dot/README.md#the-slice-operator-in-python27-is-inconsistent)") return self[i:j:]
def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(exc_val, Exception): from pyLibrary.debugs.logs import Log Log.warning(template="Ignored failure while " + self.template, default_params=self.more_params, cause=exc_val, stack_depth=1) return True
class TextLog_usingElasticSearch(TextLog): @use_settings def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ self.es = Cluster(settings).get_or_create_index( schema=convert.json2value(convert.value2json(SCHEMA), leaves=True), limit_replicas=True, tjson=True, settings=settings ) self.batch_size = batch_size self.es.add_alias(coalesce(settings.alias, settings.index)) self.queue = Queue("debug logs to es", max=max_size, silent=True) self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3) self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE)) Thread.run("add debug logs to es", self._insert_loop) def write(self, template, params): if params.get("template"): # DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE self.queue.add({"value": params}) else: template = strings.limit(template, 2000) self.queue.add({"value": {"template": template, "params": params}}, timeout=3 * MINUTE) return self def _insert_loop(self, please_stop=None): bad_count = 0 while not please_stop: try: Thread.sleep(seconds=1) messages = wrap(self.queue.pop_all()) if messages: # for m in messages: # m.value.params = leafer(m.value.params) # m.value.error = leafer(m.value.error) for g, mm in jx.groupby(messages, size=self.batch_size): self.es.extend(mm) bad_count = 0 except Exception, e: Log.warning("Problem inserting logs into ES", cause=e) bad_count += 1 if bad_count > 5: break Log.warning("Given up trying to write debug logs to ES index {{index}}", index=self.es.settings.index) # CONTINUE TO DRAIN THIS QUEUE while not please_stop: try: Thread.sleep(seconds=1) self.queue.pop_all() except Exception, e: Log.warning("Should not happen", cause=e)
def _drain_events_loop(self): while True: try: self.connection.drain_events(timeout=self.timeout) except socket_timeout, e: Log.warning("timeout! Restarting pulse consumer.", cause=e) try: self.disconnect() except Exception, f: Log.warning("Problem with disconnect()", cause=f) break
def __exit__(self, exc_type, exc_val, exc_tb): Log.note("clean pulse exit") self.please_stop.go() with suppress_exception: self.target_queue.close() Log.note("stop put into queue") try: self.pulse.disconnect() except Exception, e: Log.warning("Can not disconnect during pulse exit, ignoring", e)
def __iter__(self): """ BLOCKING ITERATOR """ while not self.please_stop: try: value = self.pop() if value is not Thread.STOP: yield value except Exception, e: Log.warning("Tell me about what happened here", cause=e)
def output(*args, **kwargs): while True: try: return func(*args, **kwargs) except Exception, e: e = Except.wrap(e) if "Request limit exceeded" in e: Log.warning("AWS Problem", cause=e) continue else: Log.error("Problem with call to AWS", cause=e)
def create_index( self, index, alias=None, create_timestamp=None, schema=None, limit_replicas=None, read_only=False, tjson=False, settings=None ): if not alias: alias = settings.alias = settings.index index = settings.index = proto_name(alias, create_timestamp) if settings.alias == index: Log.error("Expecting index name to conform to pattern") if settings.schema_file: Log.error('schema_file attribute not supported. Use {"$ref":<filename>} instead') if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = convert.json2value(schema, leaves=True) else: schema = convert.json2value(convert.value2json(schema), leaves=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to= health.number_of_nodes - 1 ) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self.post( "/" + index, data=schema, headers={"Content-Type": "application/json"} ) # CONFIRM INDEX EXISTS while True: try: state = self.get("/_cluster/state", retry={"times": 5}, timeout=3) if index in state.metadata.indices: break Log.note("Waiting for index {{index}} to appear", index=index) except Exception, e: Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e) Thread.sleep(seconds=1)
def _test_mode_wait(query): """ WAIT FOR METADATA TO ARRIVE ON INDEX :param query: dict() OF REQUEST BODY :return: nothing """ try: m = meta.singlton now = Date.now() end_time = now + MINUTE # MARK COLUMNS DIRTY m.meta.columns.update({ "clear": ["partitions", "count", "cardinality", "last_updated"], "where": { "eq": { "table": join_field(split_field(query["from"])[0:1]) } } }) # BE SURE THEY ARE ON THE todo QUEUE FOR RE-EVALUATION cols = [ c for c in m.get_columns(table_name=query["from"], force=True) if c.type not in STRUCT ] for c in cols: Log.note("Mark {{column}} dirty at {{time}}", column=c.name, time=now) c.last_updated = now - TOO_OLD m.todo.push(c) while end_time > now: # GET FRESH VERSIONS cols = [ c for c in m.get_columns(table_name=query["from"]) if c.type not in STRUCT ] for c in cols: if not c.last_updated or c.cardinality == None: Log.note( "wait for column (table={{col.table}}, name={{col.name}}) metadata to arrive", col=c) break else: break Thread.sleep(seconds=1) for c in cols: Log.note( "fresh column name={{column.name}} updated={{column.last_updated|date}} parts={{column.partitions}}", column=c) except Exception, e: Log.warning("could not pickup columns", cause=e)
def close(self): if self.transaction_level > 0: Log.error("expecting commit() or rollback() before close") self.cursor = None # NOT NEEDED try: self.db.close() except Exception, e: if e.message.find("Already closed") >= 0: return Log.warning("can not close()", e)
def _exit(): Log.note("Got request to shutdown") shutdown = flask.request.environ.get('werkzeug.server.shutdown') if shutdown: shutdown() else: Log.warning("werkzeug.server.shutdown does not exist") return Response(convert.unicode2utf8(OVERVIEW), status=400, headers={"Content-Type": "text/html"})
def __getslice__(self, i, j): global _emit_slice_warning if _emit_slice_warning: _emit_slice_warning = False if not _Log: _late_import() _Log.warning( "slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes. Use [start:stop:step] (see https://github.com/klahnakoski/pyLibrary/blob/master/pyLibrary/dot/README.md#the-slice-operator-in-python27-is-inconsistent)" ) return self[i:j:]
def _delete_old_indexes(self, candidates): for c in candidates: timestamp = unicode2Date(c.index[-15:], "%Y%m%d_%H%M%S") if timestamp + self.rollover_interval < Date.today( ) - self.rollover_max: # Log.warning("Will delete {{index}}", index=c.index) try: self.cluster.delete_index(c.index) except Exception, e: Log.warning("could not delete index {{index}}", index=c.index, cause=e)
def __exit__(self, type, value, traceback): if self.readonly: self.close() return if isinstance(value, BaseException): try: if self.cursor: self.cursor.close() self.cursor = None self.rollback() except Exception, e: Log.warning(u"can not rollback()", cause=[value, e]) finally:
def _upgrade(): global _upgraded _upgraded = True try: import sys sqlite_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll") python_dll = File("pyLibrary/vendor/sqlite/sqlite3.dll") if python_dll.read_bytes() != sqlite_dll.read_bytes(): backup = sqlite_dll.backup() File.copy(python_dll, sqlite_dll) except Exception, e: Log.warning("could not upgrade python's sqlite", cause=e)
def set_attr(obj, path, value): """ SAME AS object.__setattr__(), BUT USES DOT-DELIMITED path RETURN OLD VALUE """ try: return _set_attr(obj, split_field(path), value) except Exception, e: from pyLibrary.debugs.logs import Log if PATH_NOT_FOUND in e: Log.warning(PATH_NOT_FOUND + ": {{path}}", path=path) else: Log.error("Problem setting value", e)
def _pinger(self, please_stop): Log.note("pinger started") while not please_stop: Thread.sleep(till=self.ping_time + PING_PERIOD, please_stop=please_stop) if please_stop: #EXIT EARLY, OTHERWISE WE MAY OVERWRITE THE shutdown break if Date.now() < self.ping_time + PING_PERIOD: continue try: self.ping() except Exception, e: Log.warning("synchro.py could not ping", e)