def main(): try: settings = startup.read_settings() with startup.SingleInstance(settings.args.filename): constants.set(settings.constants) Log.start(settings.debug) extractor = Extract(settings) def extract(please_stop): with MySQL(**settings.snowflake.database) as db: with db.transaction(): for kwargs in extractor.queue: if please_stop: break try: extractor.extract(db=db, please_stop=please_stop, **kwargs) except Exception as e: Log.warning("Could not extract", cause=e) extractor.queue.add(kwargs) for i in range(settings.extract.threads): Thread.run("extract #" + text_type(i), extract) please_stop = Signal() Thread.wait_for_shutdown_signal(please_stop=please_stop, allow_exit=True, wait_forever=False) except Exception as e: Log.warning("Problem with data extraction", e) finally: Log.stop()
def __exit__(self, exc_type, exc_val, exc_tb): from mo_threads import Thread Thread.run("delete file " + self.name, delete_daemon, file=self, caller_stack=get_stacktrace(1))
def __init__(self, stream): assert stream use_UTF8 = False if isinstance(stream, basestring): if stream.startswith("sys."): use_UTF8 = True # sys.* ARE OLD AND CAN NOT HANDLE unicode self.stream = eval(stream) name = stream else: self.stream = stream name = "stream" # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD from mo_threads import Queue if use_UTF8: def utf8_appender(value): if isinstance(value, unicode): value = value.encode('utf8') self.stream.write(value) appender = utf8_appender else: appender = self.stream.write self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True) self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start()
def es_bulksetop(esq, frum, query): abs_limit = MIN([query.limit, MAX_DOCUMENTS]) guid = randoms.base64(32, extra="-_") schema = frum.schema all_paths, split_decoders, var_to_columns = pre_process(query) new_select, split_select, flatten = get_selects(query) op, split_wheres = setop_to_es_queries(query, all_paths, split_select, var_to_columns) es_query = es_query_proto(split_select, op, split_wheres, schema) es_query.size = MIN([query.chunk_size, MAX_CHUNK_SIZE]) es_query.sort = jx_sort_to_es_sort(query.sort, schema) if not es_query.sort: es_query.sort = ["_doc"] formatter = formatters[query.format](abs_limit, new_select, query) Thread.run( "Download " + guid, extractor, guid, abs_limit, esq, es_query, formatter, parent_thread=Null, ).release() output = to_data( { "url": URL_PREFIX / (guid + ".json"), "status": URL_PREFIX / (guid + ".status.json"), "meta": {"format": query.format, "es_query": es_query, "limit": abs_limit}, } ) return output
def __init__(self, host, index, port=9200, type="log", queue_size=1000, batch_size=100, kwargs=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds kwargs.retry.times = coalesce(kwargs.retry.times, 3) kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds self.es = Cluster(kwargs).get_or_create_index( schema=mo_json.json2value(value2json(SCHEMA), leaves=True), limit_replicas=True, tjson=True, kwargs=kwargs) self.batch_size = batch_size self.es.add_alias(coalesce(kwargs.alias, kwargs.index)) self.queue = Queue("debug logs to es", max=queue_size, silent=True) Thread.run("add debug logs to es", self._insert_loop)
def queue_consumer(pull_queue, please_stop=None): queue = aws.Queue(pull_queue) time_offset = None request_count = 0 while not please_stop: request = queue.pop(till=please_stop) if please_stop: break if not request: Log.note("Nothing in queue, pausing for 5 seconds...") (please_stop | Till(seconds=5)).wait() continue if SKIP_TRY_REQUESTS and 'try' in request.where['and'].eq.branch: Log.note("Skipping try revision.") queue.commit() continue now = Date.now().unix if time_offset is None: time_offset = now - request.meta.request_time next_request = request.meta.request_time + time_offset if next_request > now: Log.note("Next request in {{wait_time}}", wait_time=Duration(seconds=next_request - now)) Till(till=next_request).wait() Thread.run("request "+text_type(request_count), one_request, request) request_count += 1 queue.commit()
def test_lock_and_till(self): locker = Lock("prime lock") got_lock = Signal() a_is_ready = Signal("a lock") b_is_ready = Signal("b lock") def loop(is_ready, please_stop): with locker: while not got_lock: # Log.note("{{thread}} is waiting", thread=Thread.current().name) locker.wait(till=Till(seconds=0)) is_ready.go() locker.wait() Log.note("thread is expected to get here") thread_a = Thread.run("a", loop, a_is_ready) thread_b = Thread.run("b", loop, b_is_ready) a_is_ready.wait() b_is_ready.wait() with locker: got_lock.go() Till(seconds=0.1).wait() # MUST WAIT FOR a AND b TO PERFORM locker.wait() Log.note("leaving") pass with locker: Log.note("leaving again") pass Till(seconds=1).wait() self.assertTrue(bool(thread_a.stopped), "Thread should be done by now") self.assertTrue(bool(thread_b.stopped), "Thread should be done by now")
def __init__(self, rate=None, amortization_period=None, source=None, database=None, kwargs=None): self.amortization_period = coalesce(amortization_period, AMORTIZATION_PERIOD) self.rate = coalesce(rate, HG_REQUEST_PER_SECOND) self.cache_locker = Lock() self.cache = {} # MAP FROM url TO (ready, headers, response, timestamp) PAIR self.no_cache = {} # VERY SHORT TERM CACHE self.workers = [] self.todo = Queue(APP_NAME+" todo") self.requests = Queue(APP_NAME + " requests", max=int(self.rate * self.amortization_period.seconds)) self.url = URL(source.url) self.db = Sqlite(database) self.inbound_rate = RateLogger("Inbound") self.outbound_rate = RateLogger("hg.mo") if not self.db.query("SELECT name FROM sqlite_master WHERE type='table'").data: with self.db.transaction() as t: t.execute( "CREATE TABLE cache (" " path TEXT PRIMARY KEY, " " headers TEXT, " " response TEXT, " " timestamp REAL " ")" ) self.threads = [ Thread.run(APP_NAME+" worker" + text_type(i), self._worker) for i in range(CONCURRENCY) ] self.limiter = Thread.run(APP_NAME+" limiter", self._rate_limiter) self.cleaner = Thread.run(APP_NAME+" cleaner", self._cache_cleaner)
def test_queue_speed(self): SCALE = 1000*10 done = Signal("done") slow = Queue() q = ThreadedQueue("test queue", queue=slow) def empty(please_stop): while not please_stop: item = q.pop() if item is THREAD_STOP: break done.go() Thread.run("empty", empty) timer = Timer("add {{num}} to queue", param={"num": SCALE}) with timer: for i in range(SCALE): q.add(i) q.add(THREAD_STOP) Log.note("Done insert") done.wait() self.assertLess(timer.duration.seconds, 1.5, "Expecting queue to be fast")
def __init__(self, name): self.name = name self.lock = Lock("rate locker") self.request_rate = 0.0 self.last_request = Date.now() Thread.run("rate logger", self._daemon)
def __init__(self, stream): assert stream if is_text(stream): name = stream stream = self.stream = eval(stream) if name.startswith("sys.") and PY3: self.stream = Data(write=lambda d: stream.write(d.decode('utf8'))) else: name = "stream" self.stream = stream # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD from mo_threads import Queue def utf8_appender(value): if is_text(value): value = value.encode('utf8') self.stream.write(value) appender = utf8_appender self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True) self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start()
def queue_consumer(pull_queue, please_stop=None): queue = aws.Queue(pull_queue) time_offset = None request_count = 0 while not please_stop: request = queue.pop(till=please_stop) if please_stop: break if not request: Log.note("Nothing in queue, pausing for 5 seconds...") (please_stop | Till(seconds=5)).wait() continue if SKIP_TRY_REQUESTS and 'try' in request.where['and'].eq.branch: Log.note("Skipping try revision.") queue.commit() continue now = Date.now().unix if time_offset is None: time_offset = now - request.meta.request_time next_request = request.meta.request_time + time_offset if next_request > now: Log.note("Next request in {{wait_time}}", wait_time=Duration(seconds=next_request - now)) Till(till=next_request).wait() Thread.run("request " + text_type(request_count), one_request, request) request_count += 1 queue.commit()
def capture_termination_signal(please_stop): """ WILL SIGNAL please_stop WHEN THIS AWS INSTANCE IS DUE FOR SHUTDOWN """ def worker(please_stop): while not please_stop: try: response = requests.get( "http://169.254.169.254/latest/meta-data/spot/termination-time" ) if response.status_code not in [400, 404]: Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type) please_stop.go() except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: Log.note( "AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)" ) return else: Log.warning("AWS shutdown detection has problems", cause=e) (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait() Thread.run("listen for termination", worker)
def capture_termination_signal(please_stop): """ WILL SIGNAL please_stop WHEN THIS AWS INSTANCE IS DUE FOR SHUTDOWN """ def worker(please_stop): seen_problem = False while not please_stop: request_time = (time.time() - timer.START)/60 # MINUTES try: response = requests.get("http://169.254.169.254/latest/meta-data/spot/termination-time") seen_problem = False if response.status_code not in [400, 404]: Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type) please_stop.go() except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: Log.note("AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)") return elif seen_problem: # IGNORE THE FIRST PROBLEM Log.warning("AWS shutdown detection has more than one consecutive problem: (last request {{time|round(1)}} minutes since startup)", time=request_time, cause=e) seen_problem = True (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait() Thread.run("listen for termination", worker)
def __init__(self, host, index, alias=None, name=None, port=9200, kwargs=None): global _elasticsearch if hasattr(self, "settings"): return from pyLibrary.queries.containers.list_usingPythonList import ListContainer from pyLibrary.env import elasticsearch as _elasticsearch self.settings = kwargs self.default_name = coalesce(name, alias, index) self.default_es = _elasticsearch.Cluster(kwargs=kwargs) self.todo = Queue("refresh metadata", max=100000, unique=True) self.es_metadata = Null self.last_es_metadata = Date.now()-OLD_METADATA self.meta=Data() table_columns = metadata_tables() column_columns = metadata_columns() self.meta.tables = ListContainer("meta.tables", [], wrap({c.names["."]: c for c in table_columns})) self.meta.columns = ColumnList() self.meta.columns.insert(column_columns) self.meta.columns.insert(table_columns) # TODO: fix monitor so it does not bring down ES if ENABLE_META_SCAN: self.worker = Thread.run("refresh metadata", self.monitor) else: self.worker = Thread.run("refresh metadata", self.not_monitor) return
def __init__(self, name, config): config = to_data(config) if config.debug.logs: Log.error("not allowed to configure logging on other process") Log.note("begin process") # WINDOWS REQUIRED shell, WHILE LINUX NOT shell = "windows" in platform.system().lower() self.process = Process( name, [PYTHON, "-u", "mo_threads" + os.sep + "python_worker.py"], debug=False, cwd=os.getcwd(), shell=shell) self.process.stdin.add( value2json(set_default({}, config, {"debug": { "trace": True }}))) status = self.process.stdout.pop() if status != '{"out":"ok"}': Log.error("could not start python\n{{error|indent}}", error=self.process.stderr.pop_all() + [status] + self.process.stdin.pop_all()) self.lock = Lock("wait for response from " + name) self.current_task = DONE self.current_response = None self.current_error = None self.daemon = Thread.run("", self._daemon) self.errors = Thread.run("", self._stderr)
def __init__(self, logger): if not isinstance(logger, StructuredLogger): Log.error("Expecting a StructuredLogger") self.queue = Queue("Queue for " + self.__class__.__name__, max=10000, silent=True, allow_add_after_close=True) self.logger = logger def worker(logger, please_stop): try: while not please_stop: Till(seconds=1).wait() logs = self.queue.pop_all() for log in logs: if log is THREAD_STOP: please_stop.go() else: logger.write(**log) finally: logger.stop() self.thread = Thread("Thread for " + self.__class__.__name__, worker, logger) self.thread.parent.remove_child( self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start()
class StructuredLogger_usingThread(StructuredLogger): def __init__(self, logger, period=PERIOD): if not isinstance(logger, StructuredLogger): Log.error("Expecting a StructuredLogger") self.logger = logger self.queue = Queue( "Queue for " + self.__class__.__name__, max=10000, silent=True, allow_add_after_close=True, ) self.thread = Thread("Thread for " + self.__class__.__name__, worker, logger, self.queue, period) # worker WILL BE RESPONSIBLE FOR THREAD stop() self.thread.parent.remove_child(self.thread) self.thread.start() def write(self, template, params): try: self.queue.add({"template": template, "params": params}) return self except Exception as e: e = Except.wrap(e) raise e # OH NO! def stop(self): try: self.queue.add( THREAD_STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT self.thread.join() except Exception as e: Log.note("problem in threaded logger" + str(e))
def _get_queue(self, row): row = wrap(row) if row.json: row.value, row.json = json2value(row.json), None timestamp = Date(self.rollover_field(row.value)) if timestamp == None: return Null elif timestamp < Date.today() - self.rollover_max: return DATA_TOO_OLD rounded_timestamp = timestamp.floor(self.rollover_interval) with self.locker: queue = self.known_queues.get(rounded_timestamp.unix) if queue == None: candidates = sort_using_key( filter( lambda r: re.match( re.escape(self.settings.index) + r"\d\d\d\d\d\d\d\d_\d\d\d\d\d\d$", r['index'] ), self.cluster.get_aliases() ), key=lambda r: r['index'] ) best = None for c in candidates: c = wrap(c) c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT) if timestamp > c.date: best = c if not best or rounded_timestamp > best.date: if rounded_timestamp < wrap(candidates[-1]).date: es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings) else: try: es = self.cluster.create_index(create_timestamp=rounded_timestamp, kwargs=self.settings) es.add_alias(self.settings.index) except Exception as e: e = Except.wrap(e) if "IndexAlreadyExistsException" not in e: Log.error("Problem creating index", cause=e) return self._get_queue(row) # TRY AGAIN else: es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings) def refresh(please_stop): try: es.set_refresh_interval(seconds=60 * 10, timeout=5) except Exception: Log.note("Could not set refresh interval for {{index}}", index=es.settings.index) Thread.run("refresh", refresh) self._delete_old_indexes(candidates) threaded_queue = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True) with self.locker: queue = self.known_queues[rounded_timestamp.unix] = threaded_queue return queue
def main(): try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) please_stop = Signal("main stop signal") Thread.wait_for_shutdown_signal(please_stop) except Exception, e: Log.error("Problem with etl", cause=e)
class StructuredLogger_usingThreadedStream(StructuredLogger): # stream CAN BE AN OBJCET WITH write() METHOD, OR A STRING # WHICH WILL eval() TO ONE def __init__(self, stream): assert stream use_UTF8 = False if isinstance(stream, basestring): if stream.startswith("sys."): use_UTF8 = True # sys.* ARE OLD AND CAN NOT HANDLE unicode self.stream = eval(stream) name = stream else: self.stream = stream name = "stream" # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD from mo_threads import Queue if use_UTF8: def utf8_appender(value): if isinstance(value, unicode): value = value.encode('utf8') self.stream.write(value) appender = utf8_appender else: appender = self.stream.write self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True) self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start() def write(self, template, params): try: self.queue.add({"template": template, "params": params}) return self except Exception as e: raise e # OH NO! def stop(self): try: self.queue.add(THREAD_STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT self.thread.join() except Exception as e: if DEBUG_LOGGING: raise e try: self.queue.close() except Exception, f: if DEBUG_LOGGING: raise f
class StructuredLogger_usingThread(StructuredLogger): def __init__(self, logger): if not isinstance(logger, StructuredLogger): Log.error("Expecting a StructuredLogger") self.queue = Queue("Queue for " + self.__class__.__name__, max=10000, silent=True, allow_add_after_close=True) self.logger = logger def worker(logger, please_stop): try: while not please_stop: logs = self.queue.pop_all() if not logs: (Till(seconds=1) | please_stop).wait() continue for log in logs: if log is THREAD_STOP: please_stop.go() else: logger.write(**log) except Exception as e: print("problem in " + StructuredLogger_usingThread.__name__ + ": " + str(e)) finally: Log.note("stop the child") logger.stop() self.thread = Thread("Thread for " + self.__class__.__name__, worker, logger) self.thread.parent.remove_child( self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start() def write(self, template, params): try: self.queue.add({"template": template, "params": params}) return self except Exception as e: e = Except.wrap(e) raise e # OH NO! def stop(self): try: self.queue.add( THREAD_STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT self.thread.join() Log.note("joined on thread") except Exception as e: Log.note("problem in threaded logger" + str(e)) with suppress_exception: self.queue.close()
def inners(): for t in data.hits.hits: for i in t.inner_hits[literal_field(query_path)].hits.hits: t._inner = i._source for k, e in post_expressions.items(): t[k] = e(t) yield t if more_filter: Thread.join(need_more) for t in more[0].hits.hits: yield t
def __exit__(self, exc_type, exc_val, exc_tb): Log.note("clean pulse exit") self.please_stop.go() with suppress_exception: self.target_queue.close() Log.note("stop put into queue") try: self.pulse.disconnect() except Exception as e: Log.warning("Can not disconnect during pulse exit, ignoring", e) Thread.__exit__(self, exc_type, exc_val, exc_tb)
class StructuredLogger_usingThreadedStream(StructuredLogger): # stream CAN BE AN OBJCET WITH write() METHOD, OR A STRING # WHICH WILL eval() TO ONE def __init__(self, stream): assert stream if is_text(stream): name = stream stream = self.stream = eval(stream) if name.startswith("sys.") and PY3: self.stream = Data(write=lambda d: stream.write(d.decode('utf8'))) else: name = "stream" self.stream = stream # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD from mo_threads import Queue def utf8_appender(value): if is_text(value): value = value.encode('utf8') self.stream.write(value) appender = utf8_appender self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True) self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start() def write(self, template, params): try: self.queue.add({"template": template, "params": params}) return self except Exception as e: raise e # OH NO! def stop(self): try: self.queue.add(THREAD_STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT self.thread.join() except Exception as e: if DEBUG_LOGGING: raise e try: self.queue.close() except Exception as f: if DEBUG_LOGGING: raise f
def __init__(self): self.out_of_memory_restart = False self.total_locker = Lock() self.total_files_requested = 0 self.total_tuids_mapped = 0 self.threads_locker = Lock() self.waiting = 0 self.threads_waiting = 0 self.requests_locker = Lock() self.requests_total = 0 self.requests_complete = 0 self.requests_incomplete = 0 self.requests_passed = 0 self.requests_failed = 0 self.prev_mem = 0 self.curr_mem = 0 self.initial_growth = {} Thread.run("pc-daemon", self.run_pc_daemon) Thread.run("threads-daemon", self.run_threads_daemon) Thread.run("memory-daemon", self.run_memory_daemon) Thread.run("requests-daemon", self.run_requests_daemon)
def test_and_signals(self): acc = [] locker = Lock() def worker(please_stop): with locker: acc.append("worker") a = Thread.run("a", worker) b = Thread.run("b", worker) c = Thread.run("c", worker) (a.stopped & b.stopped & c.stopped).wait() acc.append("done") self.assertEqual(acc, ["worker", "worker", "worker", "done"])
class StructuredLogger_usingThreadedStream(StructuredLogger): # stream CAN BE AN OBJCET WITH write() METHOD, OR A STRING # WHICH WILL eval() TO ONE def __init__(self, stream): assert stream if isinstance(stream, text_type): name = stream stream = self.stream = eval(stream) if name.startswith("sys.") and PY3: self.stream = Data(write=lambda d: stream.write(d.decode('utf8'))) else: name = "stream" self.stream = stream # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD from mo_threads import Queue def utf8_appender(value): if isinstance(value, text_type): value = value.encode('utf8') self.stream.write(value) appender = utf8_appender self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True) self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start() def write(self, template, params): try: self.queue.add({"template": template, "params": params}) return self except Exception as e: raise e # OH NO! def stop(self): try: self.queue.add(THREAD_STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT self.thread.join() except Exception as e: if DEBUG_LOGGING: raise e try: self.queue.close() except Exception as f: if DEBUG_LOGGING: raise f
def __init__(self, host, index, sql_file='metadata.sqlite', alias=None, name=None, port=9200, kwargs=None): if hasattr(self, "settings"): return self.too_old = TOO_OLD self.settings = kwargs self.default_name = coalesce(name, alias, index) self.es_cluster = elasticsearch.Cluster(kwargs=kwargs) self.index_does_not_exist = set() self.todo = Queue("refresh metadata", max=100000, unique=True) self.index_to_alias = Relation_usingList() self.es_metadata = Null self.metadata_last_updated = Date.now() - OLD_METADATA self.meta = Data() self.meta.columns = ColumnList() self.alias_to_query_paths = { "meta.columns": [['.']], "meta.tables": [['.']] } self.alias_last_updated = { "meta.columns": Date.now(), "meta.tables": Date.now() } table_columns = metadata_tables() self.meta.tables = ListContainer( "meta.tables", [ # TableDesc("meta.columns", None, ".", Date.now()), # TableDesc("meta.tables", None, ".", Date.now()) ], jx_base.Schema(".", table_columns)) self.meta.columns.extend(table_columns) # TODO: fix monitor so it does not bring down ES if ENABLE_META_SCAN: self.worker = Thread.run("refresh metadata", self.monitor) else: self.worker = Thread.run("refresh metadata", self.not_monitor) return
def __init__(self, conn=None, tuid_service=None, kwargs=None): try: self.config = kwargs self.conn = conn if conn else sql.Sql(self.config.database.name) self.hg_cache = HgMozillaOrg( kwargs=self.config.hg_cache, use_cache=True) if self.config.hg_cache else Null self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService( database=None, hg=None, kwargs=self.config, conn=self.conn, clogger=self) self.rev_locker = Lock() self.working_locker = Lock() self.init_db() self.next_revnum = coalesce( self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1) self.csets_todo_backwards = Queue( name="Clogger.csets_todo_backwards") self.deletions_todo = Queue(name="Clogger.deletions_todo") self.maintenance_signal = Signal(name="Clogger.maintenance_signal") self.config = self.config.tuid self.disable_backfilling = False self.disable_tipfilling = False self.disable_deletion = False self.disable_maintenance = False # Make sure we are filled before allowing queries numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0] if numrevs < MINIMUM_PERMANENT_CSETS: Log.note("Filling in csets to hold {{minim}} csets.", minim=MINIMUM_PERMANENT_CSETS) oldest_rev = 'tip' with self.conn.transaction() as t: tmp = t.query( "SELECT min(revnum), revision FROM csetLog").data[0][1] if tmp: oldest_rev = tmp self._fill_in_range(MINIMUM_PERMANENT_CSETS - numrevs, oldest_rev, timestamp=False) Log.note( "Table is filled with atleast {{minim}} entries. Starting workers...", minim=MINIMUM_PERMANENT_CSETS) Thread.run('clogger-tip', self.fill_forward_continuous) Thread.run('clogger-backfill', self.fill_backward_with_list) Thread.run('clogger-maintenance', self.csetLog_maintenance) Thread.run('clogger-deleter', self.csetLog_deleter) Log.note("Started clogger workers.") except Exception as e: Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))
class StructuredLogger_usingThread(StructuredLogger): def __init__(self, logger): if not isinstance(logger, StructuredLogger): Log.error("Expecting a StructuredLogger") self.queue = Queue("Queue for " + self.__class__.__name__, max=10000, silent=True, allow_add_after_close=True) self.logger = logger def worker(logger, please_stop): try: while not please_stop: logs = self.queue.pop_all() if not logs: (Till(seconds=1) | please_stop).wait() continue for log in logs: if log is THREAD_STOP: please_stop.go() else: logger.write(**log) except Exception as e: print("problem in " + StructuredLogger_usingThread.__name__ + ": " + str(e)) finally: Log.note("stop the child") logger.stop() self.thread = Thread("Thread for " + self.__class__.__name__, worker, logger) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start() def write(self, template, params): try: self.queue.add({"template": template, "params": params}) return self except Exception as e: e = Except.wrap(e) raise e # OH NO! def stop(self): try: self.queue.add(THREAD_STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT self.thread.join() Log.note("joined on thread") except Exception as e: Log.note("problem in threaded logger" + str(e)) with suppress_exception: self.queue.close()
def __init__(self, name, config): config = wrap(config) if config.debug.logs: Log.error("not allowed to configure logging on other process") self.process = Process(name, [PYTHON, "mo_threads" + os.sep + "python_worker.py"], shell=True) self.process.stdin.add(value2json(set_default({"debug": {"trace": True}}, config))) self.lock = Lock("wait for response from "+name) self.current_task = None self.current_response = None self.current_error = None self.daemon = Thread.run("", self._daemon) self.errors = Thread.run("", self._stderr)
def __init__(self, name): Table.__init__(self, "meta.columns") self.db_file = File("metadata." + name + ".sqlite") self.data = {} # MAP FROM ES_INDEX TO (abs_column_name to COLUMNS) self.locker = Lock() self._schema = None self.db = sqlite3.connect( database=self.db_file.abspath, check_same_thread=False, isolation_level=None ) self.last_load = Null self.todo = Queue( "update columns to db" ) # HOLD (action, column) PAIR, WHERE action in ['insert', 'update'] self._db_load() Thread.run("update " + name, self._db_worker)
def test_till_in_loop(self): def loop(please_stop): counter = 0 while not please_stop: (Till(seconds=0.001) | please_stop).wait() counter += 1 Log.note("{{count}}", count=counter) please_stop=Signal("please_stop") Thread.run("loop", loop, please_stop=please_stop) Till(seconds=1).wait() with please_stop.lock: self.assertLessEqual(len(please_stop.job_queue), 1, "Expecting only one pending job on go") please_stop.go()
def __init__( self, host, index, port=9200, type="log", queue_size=1000, batch_size=100, kwargs=None, ): """ settings ARE FOR THE ELASTICSEARCH INDEX """ kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds kwargs.retry.times = coalesce(kwargs.retry.times, 3) kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds kwargs.host = Random.sample(listwrap(host), 1)[0] schema = json2value(value2json(SCHEMA), leaves=True) schema.mappings[type].properties["~N~"].type = "nested" self.es = Cluster(kwargs).get_or_create_index( schema=schema, limit_replicas=True, typed=True, kwargs=kwargs, ) self.batch_size = batch_size self.es.add_alias(coalesce(kwargs.alias, kwargs.index)) self.queue = Queue("debug logs to es", max=queue_size, silent=True) self.worker = Thread.run("add debug logs to es", self._insert_loop)
def __init__(self, logger): if not isinstance(logger, StructuredLogger): Log.error("Expecting a StructuredLogger") self.queue = Queue("Queue for " + self.__class__.__name__, max=10000, silent=True, allow_add_after_close=True) self.logger = logger def worker(logger, please_stop): try: while not please_stop: logs = self.queue.pop_all() if not logs: (Till(seconds=1) | please_stop).wait() continue for log in logs: if log is THREAD_STOP: please_stop.go() else: logger.write(**log) except Exception as e: print("problem in " + StructuredLogger_usingThread.__name__ + ": " + str(e)) finally: Log.note("stop the child") logger.stop() self.thread = Thread("Thread for " + self.__class__.__name__, worker, logger) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start()
def __init__(self, host, index, port=9200, type="log", max_size=1000, batch_size=100, kwargs=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ self.es = Cluster(kwargs).get_or_create_index( schema=mo_json.json2value(value2json(SCHEMA), leaves=True), limit_replicas=True, tjson=True, kwargs=kwargs ) self.batch_size = batch_size self.es.add_alias(coalesce(kwargs.alias, kwargs.index)) self.queue = Queue("debug logs to es", max=max_size, silent=True) self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3) self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE)) Thread.run("add debug logs to es", self._insert_loop)
def __init__(self, stream): assert stream if isinstance(stream, text_type): name = stream stream = self.stream = eval(stream) if name.startswith("sys.") and PY3: self.stream = Data(write=lambda d: stream.write(d.decode('utf8'))) else: name = "stream" self.stream = stream # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD from mo_threads import Queue def utf8_appender(value): if isinstance(value, text_type): value = value.encode('utf8') self.stream.write(value) appender = utf8_appender self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True) self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3) self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start()
def _find_revision(self, revision): please_stop = False locker = Lock() output = [] queue = Queue("branches", max=2000) queue.extend(b for b in self.branches if b.locale == DEFAULT_LOCALE and b.name in ["try", "mozilla-inbound", "autoland"]) queue.add(THREAD_STOP) problems = [] def _find(please_stop): for b in queue: if please_stop: return try: url = b.url + "json-info?node=" + revision rev = self.get_revision(Revision(branch=b, changeset={"id": revision})) with locker: output.append(rev) Log.note("Revision found at {{url}}", url=url) except Exception as f: problems.append(f) threads = [] for i in range(3): threads.append(Thread.run("find changeset " + text_type(i), _find, please_stop=please_stop)) for t in threads: with assert_no_exception: t.join() return output
def setup( self, instance, # THE boto INSTANCE OBJECT FOR THE MACHINE TO SETUP utility # THE utility OBJECT FOUND IN CONFIG ): with self.locker: if not self.settings.setup_timeout: Log.error("expecting instance.setup_timeout to prevent setup from locking") def worker(please_stop): cpu_count = int(round(utility.cpu)) with hide('output'): Log.note("setup {{instance}}", instance=instance.id) self._config_fabric(instance) Log.note("update packages on {{instance}} ip={{ip}}", instance=instance.id, ip=instance.ip_address) try: self._update_ubuntu_packages() except Exception as e: Log.warning("Can not setup {{instance}}, type={{type}}", instance=instance.id, type=instance.instance_type, cause=e) return Log.note("setup etl on {{instance}}", instance=instance.id) self._setup_etl_code() Log.note("setup grcov on {{instance}}", instance=instance.id) self._setup_grcov() Log.note("add config file on {{instance}}", instance=instance.id) self._add_private_file() Log.note("setup supervisor on {{instance}}", instance=instance.id) self._setup_etl_supervisor(cpu_count) Log.note("setup done {{instance}}", instance=instance.id) worker_thread = Thread.run("etl setup started at "+unicode(Date.now().format()), worker) (Till(timeout=Duration(self.settings.setup_timeout).seconds) | worker_thread.stopped).wait() if not worker_thread.stopped: Log.error("critical failure in thread {{name|quote}}", name=worker_thread.name) worker_thread.join()
def __init__(self, host, index, sql_file='metadata.sqlite', alias=None, name=None, port=9200, kwargs=None): if hasattr(self, "settings"): return self.too_old = TOO_OLD self.settings = kwargs self.default_name = coalesce(name, alias, index) self.es_cluster = elasticsearch.Cluster(kwargs=kwargs) self.index_does_not_exist = set() self.todo = Queue("refresh metadata", max=100000, unique=True) self.index_to_alias = Relation_usingList() self.es_metadata = Null self.metadata_last_updated = Date.now() - OLD_METADATA self.meta = Data() self.meta.columns = ColumnList() self.alias_to_query_paths = { "meta.columns": [['.']], "meta.tables": [['.']] } self.alias_last_updated = { "meta.columns": Date.now(), "meta.tables": Date.now() } table_columns = metadata_tables() self.meta.tables = ListContainer( "meta.tables", [ # TableDesc("meta.columns", None, ".", Date.now()), # TableDesc("meta.tables", None, ".", Date.now()) ], jx_base.Schema(".", table_columns) ) self.meta.columns.extend(table_columns) # TODO: fix monitor so it does not bring down ES if ENABLE_META_SCAN: self.worker = Thread.run("refresh metadata", self.monitor) else: self.worker = Thread.run("refresh metadata", self.not_monitor) return
def __init__( self, hg=None, # CONNECT TO hg repo=None, # CONNECTION INFO FOR ES CACHE branches=None, # CONNECTION INFO FOR ES CACHE use_cache=False, # True IF WE WILL USE THE ES FOR DOWNLOADING BRANCHES timeout=30 * SECOND, kwargs=None ): if not _hg_branches: _late_imports() self.es_locker = Lock() self.todo = mo_threads.Queue("todo for hg daemon", max=DAEMON_QUEUE_SIZE) self.settings = kwargs self.timeout = Duration(timeout) # VERIFY CONNECTIVITY with Explanation("Test connect with hg"): response = http.head(self.settings.hg.url) if branches == None: self.branches = _hg_branches.get_branches(kwargs=kwargs) self.es = None return self.last_cache_miss = Date.now() set_default(repo, {"schema": revision_schema}) self.es = elasticsearch.Cluster(kwargs=repo).get_or_create_index(kwargs=repo) def setup_es(please_stop): with suppress_exception: self.es.add_alias() with suppress_exception: self.es.set_refresh_interval(seconds=1) Thread.run("setup_es", setup_es) self.branches = _hg_branches.get_branches(kwargs=kwargs) self.timeout = timeout Thread.run("hg daemon", self._daemon)
def _setup(): threads = Data() signals = Data() db = Sqlite() db.query("CREATE TABLE my_table (value TEXT)") for name in ["a", "b"]: signals[name] = [{"begin": Signal(), "done": Signal()} for _ in range(4)] threads[name] = Thread.run(name, _work, name, db, signals[name]) return db, threads, signals
def __init__(self, filename=None, db=None, upgrade=True): """ :param db: Optional, wrap a sqlite db in a thread :return: Multithread-safe database """ if upgrade and not _upgraded: _upgrade() self.filename = filename self.db = db self.queue = Queue("sql commands") # HOLD (command, result, signal) PAIRS self.worker = Thread.run("sqlite db thread", self._worker) self.get_trace = DEBUG self.upgrade = upgrade
def capture_termination_signal(please_stop): """ WILL SIGNAL please_stop WHEN THIS AWS INSTANCE IS DUE FOR SHUTDOWN """ def worker(please_stop): while not please_stop: try: response = requests.get("http://169.254.169.254/latest/meta-data/spot/termination-time") if response.status_code not in [400, 404]: Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type) please_stop.go() except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: Log.note("AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)") return else: Log.warning("AWS shutdown detection has problems", cause=e) (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait() Thread.run("listen for termination", worker, please_stop=please_stop)
def __init__( self, exchange, # name of the Pulse exchange topic, # message name pattern to subscribe to ('#' is wildcard) target=None, # WILL BE CALLED WITH PULSE PAYLOADS AND ack() IF COMPLETE$ED WITHOUT EXCEPTION target_queue=None, # (aka self.queue) WILL BE FILLED WITH PULSE PAYLOADS host='pulse.mozilla.org', # url to connect, port=5671, # tcp port user=None, password=None, vhost="/", start=0, # USED AS STARTING POINT FOR ASSIGNING THE _meta.count ATTRIBUTE ssl=True, applabel=None, heartbeat=False, # True to also get the Pulse heartbeat message durable=False, # True to keep queue after shutdown serializer='json', broker_timezone='GMT', kwargs=None ): global count count = coalesce(start, 0) self.target_queue = target_queue self.pulse_target = target if (target_queue == None and target == None) or (target_queue != None and target != None): Log.error("Expecting a queue (for fast digesters) or a target (for slow digesters)") Thread.__init__(self, name="Pulse consumer for " + kwargs.exchange, target=self._worker) self.settings = kwargs kwargs.callback = self._got_result kwargs.user = coalesce(kwargs.user, kwargs.username) kwargs.applabel = coalesce(kwargs.applable, kwargs.queue, kwargs.queue_name) kwargs.topic = topic self.pulse = ModifiedGenericConsumer(kwargs, connect=True, **kwargs) self.start()
def note( cls, template, default_params={}, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if not isinstance(template, text_type): Log.error("Log.note was expecting a unicode template") if len(template) > 10000: template = template[:10000] params = dict(unwrap(default_params), **more_params) log_params = set_default({ "template": template, "params": params, "timestamp": datetime.utcnow(), "machine": machine_metadata }, log_context, {"context": exceptions.NOTE}) if not template.startswith("\n") and template.find("\n") > -1: template = "\n" + template if cls.trace: log_template = "{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" ({{location.method}}) - " + template.replace("{{", "{{params.") f = sys._getframe(stack_depth + 1) log_params.location = { "line": f.f_lineno, "file": text_type(f.f_code.co_filename.split(os.sep)[-1]), "method": text_type(f.f_code.co_name) } thread = _Thread.current() log_params.thread = {"name": thread.name, "id": thread.id} else: log_template = "{{timestamp|datetime}} - " + template.replace("{{", "{{params.") cls.main_log.write(log_template, log_params)
def query(self, command): """ WILL BLOCK CALLING THREAD UNTIL THE command IS COMPLETED :param command: COMMAND FOR SQLITE :return: list OF RESULTS """ if not self.worker: self.worker = Thread.run("sqlite db thread", self._worker) signal = Signal() result = Data() self.queue.add((command, result, signal, None)) signal.wait() if result.exception: Log.error("Problem with Sqlite call", cause=result.exception) return result
def one_request(request, please_stop): and_op = request.where['and'] files = [] for a in and_op: if a['in'].path: files = a['in'].path elif a.eq.path: files = [a.eq.path] with Timer("Make TUID request from {{timestamp|datetime}}", {"timestamp": request.meta.request_time}): try: result = http.post_json( "http://localhost:5000/tuid", json=request, timeout=30 ) if result is None or len(result.data) != len(files): Log.note("incomplete response for {{thread}}", thread=Thread.current().name) except Exception as e: Log.warning("Request failure", cause=e)
def time_delta_pusher(please_stop, appender, queue, interval): """ appender - THE FUNCTION THAT ACCEPTS A STRING queue - FILLED WITH LOG ENTRIES {"template":template, "params":params} TO WRITE interval - timedelta USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL """ next_run = time() + interval while not please_stop: profiler = Thread.current().cprofiler profiler.disable() (Till(till=next_run) | please_stop).wait() profiler.enable() next_run = time() + interval logs = queue.pop_all() if not logs: continue lines = [] for log in logs: try: if log is THREAD_STOP: please_stop.go() next_run = time() else: expanded = expand_template(log.get("template"), log.get("params")) lines.append(expanded) except Exception as e: location = log.get('params', {}).get('location', {}) Log.warning("Trouble formatting log from {{location}}", location=location, cause=e) # SWALLOW ERROR, GOT TO KEEP RUNNING try: appender(u"\n".join(lines) + u"\n") except Exception as e: sys.stderr.write(str("Trouble with appender: ") + str(e.__class__.__name__) + str("\n"))
def _annotate( cls, item, timestamp, stack_depth ): """ :param itemt: A LogItemTHE TYPE OF MESSAGE :param stack_depth: FOR TRACKING WHAT LINE THIS CAME FROM :return: """ item.timestamp = timestamp item.machine = machine_metadata item.template = strings.limit(item.template, 10000) item.format = strings.limit(item.format, 10000) if item.format == None: format = text_type(item) else: format = item.format.replace("{{", "{{params.") if not format.startswith(CR) and format.find(CR) > -1: format = CR + format if cls.trace: log_format = item.format = "{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" - ({{location.method}}) - " + format f = sys._getframe(stack_depth + 1) item.location = { "line": f.f_lineno, "file": text_type(f.f_code.co_filename), "method": text_type(f.f_code.co_name) } thread = _Thread.current() item.thread = {"name": thread.name, "id": thread.id} else: log_format = item.format = "{{timestamp|datetime}} - " + format cls.main_log.write(log_format, item.__data__())
Log.note("Skipping try revision.") queue.commit() continue now = Date.now().unix if time_offset is None: time_offset = now - request.meta.request_time next_request = request.meta.request_time + time_offset if next_request > now: Log.note("Next request in {{wait_time}}", wait_time=Duration(seconds=next_request - now)) Till(till=next_request).wait() Thread.run("request "+text_type(request_count), one_request, request) request_count += 1 queue.commit() if __name__ == '__main__': try: tmp_signal = Signal() config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) queue_consumer(kwargs=config, please_stop=tmp_signal) worker = Thread.run("sqs consumer", queue_consumer, kwargs=config) MAIN_THREAD.wait_for_shutdown_signal(allow_exit=True, please_stop=worker.stopped) except BaseException as e: Log.error("Serious problem with consumer construction! Shutdown!", cause=e)