class TextLog_usingLogger(TextLog): def __init__(self, settings): self.logger = logging.Logger("unique name", level=logging.INFO) self.logger.addHandler(make_log_from_settings(settings)) # TURNS OUT LOGGERS ARE REALLY SLOW TOO self.queue = threads.Queue("queue for classic logger", max=10000, silent=True) self.thread = Thread( "pushing to classic logger", time_delta_pusher, appender=self.logger.info, queue=self.queue, interval=0.3 ) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start() def write(self, template, params): # http://docs.python.org/2/library/logging.html# logging.LogRecord self.queue.add({"template": template, "params": params}) def stop(self): with suppress_exception: self.queue.add(Thread.STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT self.thread.join() with suppress_exception: self.queue.close()
def main(): try: settings = startup.read_settings() Log.start(settings.debug) constants.set(settings.constants) with startup.SingleInstance(flavor_id=settings.args.filename): with aws.s3.Bucket(settings.destination) as bucket: if settings.param.debug: if settings.source.durable: Log.error("Can not run in debug mode with a durable queue") synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False)) else: synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False)) if settings.source.durable: synch.startup() queue = PersistentQueue(settings.param.queue_file) if queue: last_item = queue[len(queue) - 1] synch.source_key = last_item._meta.count + 1 with pulse.Consumer(settings=settings.source, target=None, target_queue=queue, start=synch.source_key): Thread.run("pulse log loop", log_loop, settings, synch, queue, bucket) Thread.wait_for_shutdown_signal(allow_exit=True) Log.warning("starting shutdown") queue.close() Log.note("write shutdown state to S3") synch.shutdown() except Exception, e: Log.error("Problem with etl", e)
def __init__(self, name, in_queue, out_queue, function): Thread.__init__(self, name, self.event_loop) self.in_queue = in_queue self.out_queue = out_queue self.function = function self.num_runs = 0 self.start()
class TextLog_usingThread(TextLog): def __init__(self, logger): if not _Log: _delayed_imports() self.queue = _Queue("logs", max=10000, silent=True) self.logger = logger def worker(please_stop): while not please_stop: Thread.sleep(1) logs = self.queue.pop_all() for log in logs: if log is Thread.STOP: if DEBUG_LOGGING: sys.stdout.write("TextLog_usingThread.worker() sees stop, filling rest of queue\n") please_stop.go() else: self.logger.write(**log) self.thread = Thread("log thread", worker) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start() def write(self, template, params): try: self.queue.add({"template": template, "params": params}) return self except Exception, e: e = _Except.wrap(e) sys.stdout.write("IF YOU SEE THIS, IT IS LIKELY YOU FORGOT TO RUN Log.start() FIRST\n") raise e # OH NO!
class Log_usingThread(BaseLog): def __init__(self, logger): # DELAYED LOAD FOR THREADS MODULE from pyLibrary.thread.threads import Queue self.queue = Queue("logs", max=10000, silent=True) self.logger = logger def worker(please_stop): while not please_stop: Thread.sleep(1) logs = self.queue.pop_all() for log in logs: if log is Thread.STOP: if DEBUG_LOGGING: sys.stdout.write("Log_usingThread.worker() sees stop, filling rest of queue\n") please_stop.go() else: self.logger.write(**log) self.thread = Thread("log thread", worker) self.thread.start() def write(self, template, params): try: self.queue.add({"template": template, "params": params}) return self except Exception, e: sys.stdout.write("IF YOU SEE THIS, IT IS LIKELY YOU FORGOT TO RUN Log.start() FIRST\n") raise e # OH NO!
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id(s) to process. Use \"..\" for a range.", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) if settings.args.id: etl_one(settings) return hg = HgMozillaOrg(settings=settings.hg) resources = Dict(hg=dictwrap(hg)) stopper = Signal() for i in range(coalesce(settings.param.threads, 1)): ETL(name="ETL Loop " + unicode(i), work_queue=settings.work_queue, resources=resources, workers=settings.workers, settings=settings.param, please_stop=stopper) Thread.wait_for_shutdown_signal(stopper, allow_exit=True) except Exception, e: Log.error("Problem with etl", e)
def get_columns(self, table_name, column_name=None, force=False): """ RETURN METADATA COLUMNS """ try: # LAST TIME WE GOT INFO FOR THIS TABLE short_name = join_field(split_field(table_name)[0:1]) table = self.get_table(short_name)[0] if not table: table = Table( name=short_name, url=None, query_path=None, timestamp=Date.now() ) with self.meta.tables.locker: self.meta.tables.add(table) self._get_columns(table=short_name) elif force or table.timestamp == None or table.timestamp < Date.now() - MAX_COLUMN_METADATA_AGE: table.timestamp = Date.now() self._get_columns(table=short_name) with self.meta.columns.locker: columns = self.meta.columns.find(table_name, column_name) if columns: columns = jx.sort(columns, "name") # AT LEAST WAIT FOR THE COLUMNS TO UPDATE while len(self.todo) and not all(columns.get("last_updated")): Log.note("waiting for columns to update {{columns|json}}", columns=[c.table+"."+c.es_column for c in columns if not c.last_updated]) Thread.sleep(seconds=1) return columns except Exception, e: Log.error("Not expected", cause=e)
class TextLog_usingLogger(TextLog): def __init__(self, settings): self.logger = logging.Logger("unique name", level=logging.INFO) self.logger.addHandler(make_log_from_settings(settings)) # TURNS OUT LOGGERS ARE REALLY SLOW TOO self.queue = threads.Queue("log to classic logger", max=10000, silent=True) self.thread = Thread("log to logger", time_delta_pusher, appender=self.logger.info, queue=self.queue, interval=timedelta(seconds=0.3)) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start() def write(self, template, params): # http://docs.python.org/2/library/logging.html# logging.LogRecord self.queue.add({"template": template, "params": params}) def stop(self): with suppress_exception: if DEBUG_LOGGING: sys.stdout.write("TextLog_usingLogger sees stop, adding stop to queue\n") self.queue.add(Thread.STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT self.thread.join() if DEBUG_LOGGING: sys.stdout.write("TextLog_usingLogger done\n") with suppress_exception: self.queue.close()
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id(s) to process. Use \"..\" for a range.", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) if settings.args.id: etl_one(settings) return hg = HgMozillaOrg(settings=settings.hg) resources = Dict(hg=dictwrap(hg)) stopper = Signal() for i in range(coalesce(settings.param.threads, 1)): ETL( name="ETL Loop " + unicode(i), work_queue=settings.work_queue, resources=resources, workers=settings.workers, settings=settings.param, please_stop=stopper ) Thread.wait_for_shutdown_signal(stopper, allow_exit=True) except Exception, e: Log.error("Problem with etl", e)
def __init__(self, host, index, alias=None, name=None, port=9200, settings=None): global _elasticsearch if hasattr(self, "settings"): return from pyLibrary.queries.containers.lists import ListContainer from pyLibrary.env import elasticsearch as _elasticsearch self.settings = settings self.default_name = coalesce(name, alias, index) self.default_es = _elasticsearch.Cluster(settings=settings) self.todo = Queue("refresh metadata", max=100000, unique=True) self.meta=Dict() table_columns = metadata_tables() column_columns = metadata_columns() self.meta.tables = ListContainer("meta.tables", [], wrap({c.name: c for c in table_columns})) self.meta.columns = ListContainer("meta.columns", [], wrap({c.name: c for c in column_columns})) self.meta.columns.insert(column_columns) self.meta.columns.insert(table_columns) # TODO: fix monitor so it does not bring down ES if ENABLE_META_SCAN: self.worker = Thread.run("refresh metadata", self.monitor) else: self.worker = Thread.run("refresh metadata", self.not_monitor) return
def test_queue_speed(self): SCALE = 1000*10 done = Signal("done") slow = Queue() q = ThreadedQueue("test queue", queue=slow) def empty(please_stop): while not please_stop: item = q.pop() if item is Thread.STOP: break done.go() Thread.run("empty", empty) timer = Timer("add {{num}} to queue", param={"num": SCALE}) with timer: for i in range(SCALE): q.add(i) q.add(Thread.STOP) Log.note("Done insert") done.wait() self.assertLess(timer.duration.seconds, 1.5, "Expecting queue to be fast")
def _insert_loop(self, please_stop=None): bad_count = 0 while not please_stop: try: Thread.sleep(seconds=1) messages = wrap(self.queue.pop_all()) if not messages: continue for g, mm in jx.groupby(messages, size=self.batch_size): scrubbed = [] try: for i, message in enumerate(mm): if message is Thread.STOP: please_stop.go() return scrubbed.append(_deep_json_to_string(message, depth=3)) finally: self.es.extend(scrubbed) bad_count = 0 except Exception, e: Log.warning("Problem inserting logs into ES", cause=e) bad_count += 1 if bad_count > MAX_BAD_COUNT: Log.warning("Given up trying to write debug logs to ES index {{index}}", index=self.es.settings.index) Thread.sleep(seconds=30)
def __init__(self, logger): # DELAYED LOAD FOR THREADS MODULE from pyLibrary.thread.threads import Queue self.queue = Queue("logs", max=10000, silent=True) self.logger = logger def worker(please_stop): while not please_stop: Thread.sleep(1) logs = self.queue.pop_all() for log in logs: if log is Thread.STOP: if DEBUG_LOGGING: sys.stdout.write( "Log_usingThread.worker() sees stop, filling rest of queue\n" ) please_stop.go() else: self.logger.write(**log) self.thread = Thread("log thread", worker) self.thread.parent.remove_child( self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start()
def __init__(self, name, params, cwd=None, env=None, debug=False): self.name = name self.service_stopped = Signal("stopped signal for " + convert.string2quote(name)) self.stdin = Queue("stdin for process " + convert.string2quote(name), silent=True) self.stdout = Queue("stdout for process " + convert.string2quote(name), silent=True) self.stderr = Queue("stderr for process " + convert.string2quote(name), silent=True) try: self.debug=debug or DEBUG self.service = service = subprocess.Popen( params, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=-1, cwd=cwd, env=env ) self.stopper = Signal() self.stopper.on_go(self._kill) self.thread_locker = Lock() self.children = [ Thread.run(self.name + " waiter", self._monitor, parent_thread=self), Thread.run(self.name + " stdin", self._writer, service.stdin, self.stdin, please_stop=self.stopper, parent_thread=self), Thread.run(self.name + " stdout", self._reader, service.stdout, self.stdout, please_stop=self.stopper, parent_thread=self), # Thread.run(self.name + " stderr", self._reader, service.stderr, self.stderr, please_stop=self.stopper, parent_thread=self), ] except Exception, e: Log.error("Can not call", e)
def get_columns(self, table_name, column_name=None, fail_when_not_found=False): """ RETURN METADATA COLUMNS """ try: with self.meta.columns.locker: columns = [ c for c in self.meta.columns.data if c.table == table_name and ( column_name is None or c.name == column_name) ] if columns: columns = jx.sort(columns, "name") if fail_when_not_found: # AT LEAST WAIT FOR THE COLUMNS TO UPDATE while len(self.todo) and not all( columns.get("last_updated")): Log.note( "waiting for columns to update {{columns|json}}", columns=[ c.table + "." + c.es_column for c in columns if not c.last_updated ]) Thread.sleep(seconds=1) return columns elif all(columns.get("last_updated")): return columns except Exception, e: Log.error("Not expected", cause=e)
def __init__( self, exchange, # name of the Pulse exchange topic, # message name pattern to subscribe to ('#' is wildcard) target=None, # WILL BE CALLED WITH PULSE PAYLOADS AND ack() IF COMPLETE$ED WITHOUT EXCEPTION target_queue=None, # (aka self.queue) WILL BE FILLED WITH PULSE PAYLOADS host='pulse.mozilla.org', # url to connect, port=5671, # tcp port user=None, password=None, vhost="/", start=0, # USED AS STARTING POINT FOR ASSIGNING THE _meta.count ATTRIBUTE ssl=True, applabel=None, heartbeat=False, # True to also get the Pulse heartbeat message durable=False, # True to keep queue after shutdown serializer='json', broker_timezone='GMT', settings=None ): self.target_queue = target_queue self.pulse_target = target if (target_queue == None and target == None) or (target_queue != None and target != None): Log.error("Expecting a queue (for fast digesters) or a target (for slow digesters)") Thread.__init__(self, name="Pulse consumer for " + settings.exchange, target=self._worker) self.settings = settings settings.callback = self._got_result settings.user = coalesce(settings.user, settings.username) settings.applabel = coalesce(settings.applable, settings.queue, settings.queue_name) settings.topic = topic self.pulse = ModifiedGenericConsumer(settings, connect=True, **settings) self.count = coalesce(start, 0) self.start()
class Log_usingLogger(BaseLog): def __init__(self, settings): self.logger = logging.Logger("unique name", level=logging.INFO) self.logger.addHandler(make_log_from_settings(settings)) # TURNS OUT LOGGERS ARE REALLY SLOW TOO self.queue = threads.Queue("log to classic logger", max=10000, silent=True) self.thread = Thread("log to logger", time_delta_pusher, appender=self.logger.info, queue=self.queue, interval=timedelta(seconds=0.3)) self.thread.start() def write(self, template, params): # http://docs.python.org/2/library/logging.html# logging.LogRecord self.queue.add({"template": template, "params": params}) def stop(self): try: if DEBUG_LOGGING: sys.stdout.write("Log_usingLogger sees stop, adding stop to queue\n") self.queue.add(Thread.STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT self.thread.join() if DEBUG_LOGGING: sys.stdout.write("Log_usingLogger done\n") except Exception, e: pass try: self.queue.close() except Exception, f: pass
def worker(please_stop): pending = Queue("pending ids", max=BATCH_SIZE*3, silent=False) pending_thread = Thread.run( "get pending", get_pending, source=source, since=last_updated, pending_bugs=pending, please_stop=please_stop ) diff_thread = Thread.run( "diff", diff, source, destination, pending, please_stop=please_stop ) replication_thread = Thread.run( "replication", replicate, source, destination, pending, config.fix, please_stop=please_stop ) pending_thread.join() diff_thread.join() pending.add(Thread.STOP) replication_thread.join() done.go() please_stop.go()
def _worker(self, please_stop): curr = "0.0" acc = [] last_count_written = -1 next_write = Date.now() while not please_stop: d = self.temp_queue.pop(timeout=MINUTE) if d == None: if not acc: continue # WRITE THE INCOMPLETE DATA TO S3, BUT NOT TOO OFTEN next_write = Date.now() + MINUTE try: if last_count_written != len(acc): if DEBUG: Log.note("write incomplete data ({{num}} lines) to {{uid}} in S3 next (time = {{next_write}})", uid=curr, next_write=next_write, num=len(acc)) self.bucket.write_lines(curr, (convert.value2json(a) for a in acc)) last_count_written = len(acc) except Exception, e: Log.note("Problem with write to S3", cause=e) elif d[UID_PATH] != curr: # WRITE acc TO S3 IF WE ARE MOVING TO A NEW KEY try: if acc: if DEBUG: Log.note("write complete data ({{num}} lines) to {{curr}} in S3", num=len(acc), curr=curr) self.bucket.write_lines(curr, (convert.value2json(a) for a in acc)) last_count_written = 0 curr = d[UID_PATH] acc = [d] except Exception, e: Log.warning("Can not store data", cause=e) Thread.sleep(30*MINUTE)
def wait_for_logs(self): old_length = -1 elements = self.find("#" + LOG_DIV + " p") while len(elements) != old_length: Thread.sleep(seconds=10) old_length = len(elements) elements = self.find("#" + LOG_DIV + " p") return [convert.JSON2object(convert.html2unicode(e.get_attribute('innerHTML'))) for e in elements]
def main(): try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) please_stop = Signal("main stop signal") Thread.wait_for_shutdown_signal(please_stop) except Exception, e: Log.error("Problem with etl", cause=e)
def __init__(self, settings): self.logger = logging.Logger("unique name", level=logging.INFO) self.logger.addHandler(make_log_from_settings(settings)) # TURNS OUT LOGGERS ARE REALLY SLOW TOO self.queue = threads.Queue("log to classic logger", max=10000, silent=True) self.thread = Thread("log to logger", time_delta_pusher, appender=self.logger.info, queue=self.queue, interval=timedelta(seconds=0.3)) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start()
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id (prefix, really) to process", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) queries.config.default = { "type": "elasticsearch", "settings": settings.elasticsearch.copy() } if settings.args.id: work_queue = Queue("local work queue") work_queue.extend(parse_id_argument(settings.args.id)) else: work_queue = aws.Queue(settings=settings.work_queue) Log.note("Listen to queue {{queue}}, and read off of {{s3}}", queue=settings.work_queue.name, s3=settings.source.bucket) es = MultiDayIndex(settings.elasticsearch, queue_size=100000) threads = [] please_stop = Signal() for _ in range(settings.threads): p = Thread.run("copy to es", copy2es, es, settings, work_queue, please_stop=please_stop) threads.append(p) def monitor_progress(please_stop): while not please_stop: Log.note("Remaining: {{num}}", num=len(work_queue)) Thread.sleep(seconds=10) Thread.run(name="monitor progress", target=monitor_progress, please_stop=please_stop) aws.capture_termination_signal(please_stop) Thread.wait_for_shutdown_signal(please_stop=please_stop, allow_exit=True) please_stop.go() Log.note("Shutdown started") except Exception, e: Log.error("Problem with etl", e)
def wait_for_queue(work_queue): """ SLEEP UNTIL WORK QUEU IS EMPTY ENOUGH FOR MORE """ # return while True: if len(work_queue) < MAX_QUEUE_SIZE: break Log.note("sleep for 5min") Thread.sleep(seconds=5 * 60)
def test_lock_wait_timeout(self): locker = Lock("test") def take_lock(please_stop): with locker: locker.wait(1) locker.wait(SECOND) locker.wait(till=Date.now()+SECOND) Thread.run("take lock", take_lock)
def worker(please_stop): while not please_stop: try: response = requests.get("http://169.254.169.254/latest/meta-data/spot/termination-time") if response.status_code != 400: please_stop.go() return except Exception, e: Thread.sleep(seconds=61, please_stop=please_stop) Thread.sleep(seconds=11, please_stop=please_stop)
def _rate_limited_get_json(self, *args, **kwargs): now = Date.now().unix with self.rate_locker: if self.request_times[self.request_pointer] >= now - 1: Log.note("Rate limiting") Thread.sleep(seconds=self.request_times[self.request_pointer] - now + 1) self.request_times[self.request_pointer] = now self.request_pointer += 1 self.request_pointer %= len(self.request_times) return http.get_json(*args, **kwargs)
def _test_mode_wait(query): """ WAIT FOR METADATA TO ARRIVE ON INDEX :param query: dict() OF REQUEST BODY :return: nothing """ try: m = meta.singlton now = Date.now() end_time = now + MINUTE # MARK COLUMNS DIRTY m.meta.columns.update({ "clear": ["partitions", "count", "cardinality", "last_updated"], "where": { "eq": { "table": join_field(split_field(query["from"])[0:1]) } } }) # BE SURE THEY ARE ON THE todo QUEUE FOR RE-EVALUATION cols = [ c for c in m.get_columns(table_name=query["from"], force=True) if c.type not in STRUCT ] for c in cols: Log.note("Mark {{column}} dirty at {{time}}", column=c.name, time=now) c.last_updated = now - TOO_OLD m.todo.push(c) while end_time > now: # GET FRESH VERSIONS cols = [ c for c in m.get_columns(table_name=query["from"]) if c.type not in STRUCT ] for c in cols: if not c.last_updated or c.cardinality == None: Log.note( "wait for column (table={{col.table}}, name={{col.name}}) metadata to arrive", col=c) break else: break Thread.sleep(seconds=1) for c in cols: Log.note( "fresh column name={{column.name}} updated={{column.last_updated|date}} parts={{column.partitions}}", column=c) except Exception, e: Log.warning("could not pickup columns", cause=e)
def create_index( self, index, alias=None, create_timestamp=None, schema=None, limit_replicas=None, read_only=False, tjson=False, settings=None ): if not alias: alias = settings.alias = settings.index index = settings.index = proto_name(alias, create_timestamp) if settings.alias == index: Log.error("Expecting index name to conform to pattern") if settings.schema_file: Log.error('schema_file attribute not supported. Use {"$ref":<filename>} instead') if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = convert.json2value(schema, leaves=True) else: schema = convert.json2value(convert.value2json(schema), leaves=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to= health.number_of_nodes - 1 ) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self.post( "/" + index, data=schema, headers={"Content-Type": "application/json"} ) # CONFIRM INDEX EXISTS while True: try: state = self.get("/_cluster/state", retry={"times": 5}, timeout=3) if index in state.metadata.indices: break Log.note("Waiting for index {{index}} to appear", index=index) except Exception, e: Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e) Thread.sleep(seconds=1)
def inners(): for t in data.hits.hits: for i in t.inner_hits[literal_field(query_path)].hits.hits: t._inner = i._source for k, e in post_expressions.items(): t[k] = e(t) yield t if more_filter: Thread.join(need_more) for t in more[0].hits.hits: yield t
def worker(please_stop): while not please_stop: Thread.sleep(1) logs = self.queue.pop_all() for log in logs: if log is Thread.STOP: if DEBUG_LOGGING: sys.stdout.write("TextLog_usingThread.worker() sees stop, filling rest of queue\n") please_stop.go() else: self.logger.write(**log)
def wait_for_logs(self): old_length = -1 elements = self.find("#" + LOG_DIV + " p") while len(elements) != old_length: Thread.sleep(seconds=10) old_length = len(elements) elements = self.find("#" + LOG_DIV + " p") return [ CNV.JSON2object(CNV.html2unicode(e.get_attribute('innerHTML'))) for e in elements ]
def _pinger(self, please_stop): Log.note("pinger started") while not please_stop: Thread.sleep(till=self.ping_time + PING_PERIOD, please_stop=please_stop) if please_stop: #EXIT EARLY, OTHERWISE WE MAY OVERWRITE THE shutdown break if Date.now() < self.ping_time + PING_PERIOD: continue try: self.ping() except Exception, e: Log.warning("synchro.py could not ping", e)
def worker(please_stop): while not please_stop: try: response = requests.get( "http://169.254.169.254/latest/meta-data/spot/termination-time" ) if response.status_code != 400: please_stop.go() return except Exception, e: Thread.sleep(seconds=61, please_stop=please_stop) Thread.sleep(seconds=11, please_stop=please_stop)
def _get_and_retry(self, url, **kwargs): """ requests 2.5.0 HTTPS IS A LITTLE UNSTABLE """ kwargs = set_default(kwargs, {"timeout": self.timeout.seconds}) try: return http.get(url, **kwargs) except Exception, e: try: Thread.sleep(seconds=5) return http.get(url.replace("https://", "http://"), **kwargs) except Exception, f: Log.error("Tried {{url}} twice. Both failed.", {"url": url}, cause=[e, f])
def trigger_job(self): while self.please_stop: now = Date.now() next = now + DAY for j in self.jobs: if j.next_run_time < now: j.next_run_time = next_run(j) self.run_job(j) next = Date.min(next, j.next_run_time) Thread.sleep(till=next, please_stop=self.please_stop)
def worker(please_stop): while not please_stop: _Thread.sleep(1) logs = self.queue.pop_all() for log in logs: if log is _Thread.STOP: if DEBUG_LOGGING: sys.stdout.write( b"TextLog_usingThread.worker() sees stop, filling rest of queue\n" ) please_stop.go() else: self.logger.write(**log)
def delete(self, filter): self.cluster.get_metadata() if self.cluster.cluster_state.version.number.startswith("0.90"): query = { "filtered": { "query": { "match_all": {} }, "filter": filter } } elif self.cluster.cluster_state.version.number.startswith("1."): query = { "query": { "filtered": { "query": { "match_all": {} }, "filter": filter } } } else: raise NotImplementedError if self.debug: Log.note("Delete bugs:\n{{query}}", query=query) keep_trying = True while keep_trying: result = self.cluster.delete(self.path + "/_query", data=convert.value2json(query), timeout=60) keep_trying = False for name, status in result._indices.items(): if status._shards.failed > 0: if status._shards.failures[0].reason.find( "rejected execution (queue capacity ") >= 0: keep_trying = True Thread.sleep(seconds=5) break if not keep_trying: for name, status in result._indices.items(): if status._shards.failed > 0: Log.error( "ES shard(s) report Failure to delete from {{index}}: {{message}}. Query was {{query}}", index=name, query=query, message=status._shards.failures[0].reason)
def test_and_signals(self): acc = [] locker = Lock() def worker(please_stop): with locker: acc.append("worker") a = Thread.run("a", worker) b = Thread.run("b", worker) c = Thread.run("c", worker) (a.stopped & b.stopped & c.stopped).wait() acc.append("done") self.assertEqual(acc, ["worker", "worker", "worker", "done"])
def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ self.es = Cluster(settings).get_or_create_index( schema=convert.json2value(convert.value2json(SCHEMA), leaves=True), limit_replicas=True, tjson=True, settings=settings ) self.batch_size=batch_size self.es.add_alias("debug") self.queue = Queue("debug logs to es", max=max_size, silent=True) Thread.run("add debug logs to es", self._insert_loop)
def __init__(self, name, params, cwd=None, env=None, debug=False): self.name = name self.service_stopped = Signal("stopped signal for " + convert.string2quote(name)) self.stdin = Queue("stdin for process " + convert.string2quote(name), silent=True) self.stdout = Queue("stdout for process " + convert.string2quote(name), silent=True) self.stderr = Queue("stderr for process " + convert.string2quote(name), silent=True) try: self.debug = debug or DEBUG self.service = service = subprocess.Popen(params, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=-1, cwd=cwd, env=env) self.stopper = Signal() self.stopper.on_go(self._kill) self.thread_locker = Lock() self.children = [ Thread.run(self.name + " waiter", self._monitor, parent_thread=self), Thread.run(self.name + " stdin", self._writer, service.stdin, self.stdin, please_stop=self.stopper, parent_thread=self), Thread.run(self.name + " stdout", self._reader, service.stdout, self.stdout, please_stop=self.stopper, parent_thread=self), Thread.run(self.name + " stderr", self._reader, service.stderr, self.stderr, please_stop=self.stopper, parent_thread=self), ] except Exception, e: Log.error("Can not call", e)
def startup(self): try: json = self.synch.read() if not json: Log.note("{{synchro_key}} does not exist. Starting.", synchro_key=SYNCHRONIZATION_KEY) return last_run = convert.json2value(json) self.next_key = last_run.next_key self.source_key = last_run.source_key if last_run.action == "shutdown": Log.note( "{{synchro_key}} shutdown detected. Starting at {{num}}", synchro_key=SYNCHRONIZATION_KEY, num=self.next_key) else: resume_time = Date(last_run.timestamp) + WAIT_FOR_ACTIVITY Log.note( "Shutdown not detected, waiting until {{time}} to see if existing pulse_logger is running...", time=resume_time) while resume_time > Date.now(): Thread.sleep(seconds=10) json = self.synch.read() if json == None: Log.note( "{{synchro_key}} disappeared! Starting over.", synchro_key=SYNCHRONIZATION_KEY) self._start() self.pinger_thread = Thread.run( "synch pinger", self._pinger) return self.next_key = last_run.next_key self.source_key = last_run.source_key if last_run.action == "shutdown": Log.note("Shutdown detected! Resuming...") self._start() self.pinger_thread = Thread.run( "synch pinger", self._pinger) return if last_run.timestamp > self.ping_time: Log.error( "Another instance of pulse_logger is running!") Log.note("No activity, still waiting...") Log.note("No activity detected! Resuming...") except Exception, e: Log.error("Can not start", e)
def note(cls, template, default_params={}, stack_depth=0, **more_params): if len(template) > 10000: template = template[:10000] params = dict(unwrap(default_params), **more_params) log_params = Dict(template=template, params=params, timestamp=datetime.utcnow(), machine=machine_metadata.name) if not template.startswith("\n") and template.find("\n") > -1: template = "\n" + template if cls.trace: log_template = "{{machine}} - {{timestamp|datetime}} - {{thread.name}} - {{location.file}}:{{location.line}} ({{location.method}}) - " + template.replace( "{{", "{{params.") f = sys._getframe(stack_depth + 1) log_params.location = { "line": f.f_lineno, "file": f.f_code.co_filename.split(os.sep)[-1], "method": f.f_code.co_name } thread = Thread.current() log_params.thread = {"name": thread.name, "id": thread.id} else: log_template = "{{timestamp|datetime}} - " + template.replace( "{{", "{{params.") cls.main_log.write(log_template, log_params)
def _get_columns(self, table=None, metadata=None): # TODO: HANDLE MORE THEN ONE ES, MAP TABLE SHORT_NAME TO ES INSTANCE if not metadata: metadata = self.default_es.get_metadata(force=True) def parse_all(please_stop): for abs_index, meta in jx.sort(metadata.indices.items(), { "value": 0, "sort": -1 }): if meta.index != abs_index: continue for _, properties in meta.mappings.items(): if please_stop: return self._parse_properties(abs_index, properties, meta) if table: for abs_index, meta in jx.sort(metadata.indices.items(), { "value": 0, "sort": -1 }): if table == meta.index: for _, properties in meta.mappings.items(): self._parse_properties(abs_index, properties, meta) return if table == abs_index: self._get_columns(table=meta.index, metadata=metadata) return else: self.parser = Thread.run("parse properties", parse_all)
def __init__(self, stream): assert stream use_UTF8 = False if isinstance(stream, basestring): if stream.startswith("sys."): use_UTF8 = True # sys.* ARE OLD AND CAN NOT HANDLE unicode self.stream = eval(stream) name = stream else: self.stream = stream name = "stream" # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD from pyLibrary.thread.threads import Queue if use_UTF8: def utf8_appender(value): if isinstance(value, unicode): value = value.encode('utf8') self.stream.write(value) appender = utf8_appender else: appender = self.stream.write self.queue = Queue("log to stream", max=10000, silent=True) self.thread = Thread("log to " + name, time_delta_pusher, appender=appender, queue=self.queue, interval=timedelta(seconds=0.3)) self.thread.start()
def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ self.es = Cluster(settings).get_or_create_index( schema=convert.json2value(convert.value2json(SCHEMA), leaves=True), limit_replicas=True, tjson=True, settings=settings ) self.batch_size = batch_size self.es.add_alias(coalesce(settings.alias, settings.index)) self.queue = Queue("debug logs to es", max=max_size, silent=True) self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3) self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE)) Thread.run("add debug logs to es", self._insert_loop)
def _get_columns(self, table=None, metadata=None): # TODO: HANDLE MORE THEN ONE ES, MAP TABLE SHORT_NAME TO ES INSTANCE if not metadata: metadata = self.default_es.get_metadata(force=True) def parse_all(please_stop): for abs_index, meta in jx.sort(metadata.indices.items(), {"value": 0, "sort": -1}): if meta.index != abs_index: continue for _, properties in meta.mappings.items(): if please_stop: return self._parse_properties(abs_index, properties, meta) if table: for abs_index, meta in jx.sort(metadata.indices.items(), {"value": 0, "sort": -1}): if table == meta.index: for _, properties in meta.mappings.items(): self._parse_properties(abs_index, properties, meta) return if table == abs_index: self._get_columns(table=meta.index, metadata=metadata) return else: self.parser = Thread.run("parse properties", parse_all)
def note(cls, template, default_params={}, stack_depth=0, **more_params): if len(template) > 10000: template = template[:10000] params = dict(unwrap(default_params), **more_params) log_params = Dict( template=template, params=params, timestamp=datetime.utcnow(), ) if not template.startswith("\n") and template.find("\n") > -1: template = "\n" + template if cls.trace: log_template = "{{timestamp|datetime}} - {{thread.name}} - {{location.file}}:{{location.line}} ({{location.method}}) - " + template.replace("{{", "{{params.") f = sys._getframe(stack_depth + 1) log_params.location = { "line": f.f_lineno, "file": f.f_code.co_filename.split(os.sep)[-1], "method": f.f_code.co_name } thread = Thread.current() log_params.thread = {"name": thread.name, "id": thread.id} else: log_template = "{{timestamp|datetime}} - " + template.replace("{{", "{{params.") cls.main_log.write(log_template, log_params)
class Log_usingThreadedStream(BaseLog): # stream CAN BE AN OBJCET WITH write() METHOD, OR A STRING # WHICH WILL eval() TO ONE def __init__(self, stream): assert stream use_UTF8 = False if isinstance(stream, basestring): if stream.startswith("sys."): use_UTF8 = True # sys.* ARE OLD AND CAN NOT HANDLE unicode self.stream = eval(stream) name = stream else: self.stream = stream name = "stream" # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD from pyLibrary.thread.threads import Queue if use_UTF8: def utf8_appender(value): if isinstance(value, unicode): value = value.encode('utf8') self.stream.write(value) appender = utf8_appender else: appender = self.stream.write self.queue = Queue("log to stream", max=10000, silent=True) self.thread = Thread("log to " + name, time_delta_pusher, appender=appender, queue=self.queue, interval=timedelta(seconds=0.3)) self.thread.parent.remove_child( self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start() def write(self, template, params): try: self.queue.add({"template": template, "params": params}) return self except Exception, e: raise e # OH NO!
def _insert_loop(self, please_stop=None): bad_count = 0 while not please_stop: try: Thread.sleep(seconds=1) messages = wrap(self.queue.pop_all()) if messages: # for m in messages: # m.value.params = leafer(m.value.params) # m.value.error = leafer(m.value.error) for g, mm in jx.groupby(messages, size=self.batch_size): self.es.extend(mm) bad_count = 0 except Exception, e: Log.warning("Problem inserting logs into ES", cause=e) bad_count += 1 if bad_count > 5: break
def __init__( self, exchange, # name of the Pulse exchange topic, # message name pattern to subscribe to ('#' is wildcard) target=None, # WILL BE CALLED WITH PULSE PAYLOADS AND ack() IF COMPLETE$ED WITHOUT EXCEPTION target_queue=None, # (aka self.queue) WILL BE FILLED WITH PULSE PAYLOADS host='pulse.mozilla.org', # url to connect, port=5671, # tcp port user=None, password=None, vhost="/", start=0, # USED AS STARTING POINT FOR ASSIGNING THE _meta.count ATTRIBUTE ssl=True, applabel=None, heartbeat=False, # True to also get the Pulse heartbeat message durable=False, # True to keep queue after shutdown serializer='json', broker_timezone='GMT', settings=None): global count count = coalesce(start, 0) self.target_queue = target_queue self.pulse_target = target if (target_queue == None and target == None) or (target_queue != None and target != None): Log.error( "Expecting a queue (for fast digesters) or a target (for slow digesters)" ) Thread.__init__(self, name="Pulse consumer for " + settings.exchange, target=self._worker) self.settings = settings settings.callback = self._got_result settings.user = coalesce(settings.user, settings.username) settings.applabel = coalesce(settings.applable, settings.queue, settings.queue_name) settings.topic = topic self.pulse = ModifiedGenericConsumer(settings, connect=True, **settings) self.start()
def __init__(self, db=None): """ :param db: Optional, wrap a sqlite db in a thread :return: Multithread save database """ self.db = None self.queue = Queue( "sql commands") # HOLD (command, result, signal) PAIRS self.worker = Thread.run("sqlite db thread", self._worker) self.get_trace = DEBUG
def add_alias(self, alias=None): alias = coalesce(alias, self.settings.alias) self.cluster_state = None self.cluster.post( "/_aliases", data={ "actions": [ {"add": {"index": self.settings.index, "alias": alias}} ] }, timeout=coalesce(self.settings.timeout, 30) ) # WAIT FOR ALIAS TO APPEAR while True: if alias in self.cluster.get("/_cluster/state").metadata.indices[self.settings.index].aliases: return Log.note("Waiting for alias {{alias}} to appear", alias=alias) Thread.sleep(seconds=1)