def worker(please_stop): seen_problem = False while not please_stop: request_time = (time.time() - timer.START) / 60 # MINUTES try: response = requests.get( "http://169.254.169.254/latest/meta-data/spot/termination-time" ) seen_problem = False if response.status_code not in [400, 404]: Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type) please_stop.go() except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: Log.note( "AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)" ) return elif seen_problem: # IGNORE THE FIRST PROBLEM Log.warning( "AWS shutdown detection has more than one consecutive problem: (last request {{time|round(1)}} minutes since startup)", time=request_time, cause=e) seen_problem = True (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait()
def _wait_for_queue_space(self, timeout=None): """ EXPECT THE self.lock TO BE HAD, WAITS FOR self.queue TO HAVE A LITTLE SPACE :param timeout: IN SECONDS """ wait_time = 5 (DEBUG and len(self.queue) > 1 * 1000 * 1000 ) and Log.warning("Queue {{name}} has over a million items") start = time() stop_waiting = Till(till=start + coalesce(timeout, DEFAULT_WAIT_TIME)) while not self.closed and len(self.queue) >= self.max: if stop_waiting: Log.error(THREAD_TIMEOUT) if self.silent: self.lock.wait(stop_waiting) else: self.lock.wait(Till(seconds=wait_time)) if not stop_waiting and len(self.queue) >= self.max: now = time() Log.alert( "Queue with name {{name|quote}} is full with ({{num}} items), thread(s) have been waiting {{wait_time}} sec", name=self.name, num=len(self.queue), wait_time=now - start)
def _wait_for_queue_space(self, timeout=DEFAULT_WAIT_TIME): """ EXPECT THE self.lock TO BE HAD, WAITS FOR self.queue TO HAVE A LITTLE SPACE """ wait_time = 5 (DEBUG and len(self.queue) > 1 * 1000 * 1000 ) and Log.warning("Queue {{name}} has over a million items") now = time() if timeout != None: time_to_stop_waiting = now + timeout else: time_to_stop_waiting = now + DEFAULT_WAIT_TIME if self.next_warning < now: self.next_warning = now + wait_time while not self.closed and len(self.queue) >= self.max: if now > time_to_stop_waiting: Log.error(THREAD_TIMEOUT) if self.silent: self.lock.wait(Till(till=time_to_stop_waiting)) else: self.lock.wait(Till(seconds=wait_time)) if len(self.queue) >= self.max: now = time() if self.next_warning < now: self.next_warning = now + wait_time Log.alert( "Queue by name of {{name|quote}} is full with ({{num}} items), thread(s) have been waiting {{wait_time}} sec", name=self.name, num=len(self.queue), wait_time=wait_time)
def _wait_for_queue_space(self, timeout=DEFAULT_WAIT_TIME): """ EXPECT THE self.lock TO BE HAD, WAITS FOR self.queue TO HAVE A LITTLE SPACE """ wait_time = 5 now = time() if timeout != None: time_to_stop_waiting = now + timeout else: time_to_stop_waiting = Null if self.next_warning < now: self.next_warning = now + wait_time while not self.please_stop and len(self.queue) >= self.max: if now > time_to_stop_waiting: if not _Log: _late_import() _Log.error(THREAD_TIMEOUT) if self.silent: self.lock.wait(Till(till=time_to_stop_waiting)) else: self.lock.wait(Till(timeout=wait_time)) if len(self.queue) > self.max: now = time() if self.next_warning < now: self.next_warning = now + wait_time _Log.alert( "Queue by name of {{name|quote}} is full with ({{num}} items), thread(s) have been waiting {{wait_time}} sec", name=self.name, num=len(self.queue), wait_time=wait_time)
def worker_bee(please_stop): def stopper(): self.add(THREAD_STOP) please_stop.on_go(stopper) _buffer = [] _post_push_functions = [] now = time() next_push = Till(till=now + period) # THE TIME WE SHOULD DO A PUSH last_push = now - period def push_to_queue(): queue.extend(_buffer) del _buffer[:] for f in _post_push_functions: f() del _post_push_functions[:] while not please_stop: try: if not _buffer: item = self.pop() now = time() if now > last_push + period: # _Log.note("delay next push") next_push = Till(till=now + period) else: item = self.pop(till=next_push) now = time() if item is THREAD_STOP: push_to_queue() please_stop.go() break elif isinstance(item, types.FunctionType): _post_push_functions.append(item) elif item is not None: _buffer.append(item) except Exception, e: e = _Except.wrap(e) if error_target: try: error_target(e, _buffer) except Exception, f: _Log.warning( "`error_target` should not throw, just deal", name=name, cause=f) else: _Log.warning("Unexpected problem", name=name, cause=e)
def _wait_for_exit(please_stop): """ /dev/null PIPED TO sys.stdin SPEWS INFINITE LINES, DO NOT POLL AS OFTEN """ try: import msvcrt _wait_for_exit_on_windows(please_stop) except: pass cr_count = 0 # COUNT NUMBER OF BLANK LINES while not please_stop: # DEBUG and Log.note("inside wait-for-shutdown loop") if cr_count > 30: (Till(seconds=3) | please_stop).wait() try: line = sys.stdin.readline() except Exception as e: Except.wrap(e) if "Bad file descriptor" in e: _wait_for_interrupt(please_stop) break # DEBUG and Log.note("read line {{line|quote}}, count={{count}}", line=line, count=cr_count) if line == "": cr_count += 1 else: cr_count = -1000000 # NOT /dev/null if line.strip() == "exit": Log.alert("'exit' Detected! Stopping...") return
def _reader(self, name, pipe, receive, please_stop): try: while not please_stop and self.service.returncode is None: line = to_text(pipe.readline().rstrip()) if line: receive.add(line) self.debug and Log.note("{{process}} ({{name}}): {{line}}", name=name, process=self.name, line=line) else: (Till(seconds=1) | please_stop).wait() # GRAB A FEW MORE LINES max = 100 while max: try: line = to_text(pipe.readline().rstrip()) if line: max = 100 receive.add(line) self.debug and Log.note("{{process}} RESIDUE: ({{name}}): {{line}}", name=name, process=self.name, line=line) else: max -= 1 except Exception: break finally: pipe.close() receive.add(THREAD_STOP) self.debug and Log.note("{{process}} ({{name}} is closed)", name=name, process=self.name) receive.add(THREAD_STOP)
def worker(please_stop): while not please_stop: try: response = requests.get( "http://169.254.169.254/latest/meta-data/spot/termination-time" ) if response.status_code not in [400, 404]: Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type) please_stop.go() except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: Log.note( "AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)" ) return else: Log.warning("AWS shutdown detection has problems", cause=e) (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait()
def _wait_for_exit(please_stop): """ /dev/null PIPED TO sys.stdin SPEWS INFINITE LINES, DO NOT POLL AS OFTEN """ try: import msvcrt _wait_for_exit_on_windows(please_stop) return except: pass cr_count = 0 # COUNT NUMBER OF BLANK LINES try: while not please_stop: # DEBUG and Log.note("inside wait-for-shutdown loop") if cr_count > 30: (Till(seconds=3) | please_stop).wait() try: # line = "" line = STDIN.readline() except Exception as e: Except.wrap(e) if "Bad file descriptor" in e: Log.note("can not read from stdin") _wait_for_interrupt(please_stop) break # DEBUG and Log.note("read line {{line|quote}}, count={{count}}", line=line, count=cr_count) if not line: cr_count += 1 else: cr_count = -1000000 # NOT /dev/null if line.strip() == b"exit": Log.alert("'exit' Detected! Stopping...") return except Exception as e: Log.warning("programming error", cause=e) finally: if please_stop: Log.note("please_stop has been requested") Log.note("done waiting for exit")
def _run(self): self.id = get_ident() with RegisterThread(self): try: if self.target is not None: a, k, self.args, self.kwargs = self.args, self.kwargs, None, None self.end_of_thread.response = self.target(*a, **k) except Exception as e: e = Except.wrap(e) self.end_of_thread.exception = e with self.parent.child_locker: emit_problem = self not in self.parent.children if emit_problem: # THREAD FAILURES ARE A PROBLEM ONLY IF NO ONE WILL BE JOINING WITH IT try: Log.error( "Problem in thread {{name|quote}}", name=self.name, cause=e ) except Exception: sys.stderr.write( str("ERROR in thread: " + self.name + " " + text(e) + "\n") ) finally: try: with self.child_locker: children = copy(self.children) for c in children: try: DEBUG and Log.note("Stopping thread " + c.name + "\n") c.stop() except Exception as e: Log.warning( "Problem stopping thread {{thread}}", thread=c.name, cause=e, ) for c in children: try: DEBUG and Log.note("Joining on thread " + c.name + "\n") c.join() except Exception as e: Log.warning( "Problem joining thread {{thread}}", thread=c.name, cause=e, ) finally: DEBUG and Log.note("Joined on thread " + c.name + "\n") del self.target, self.args, self.kwargs DEBUG and Log.note("thread {{name|quote}} stopping", name=self.name) except Exception as e: DEBUG and Log.warning( "problem with thread {{name|quote}}", cause=e, name=self.name ) finally: if not self.ready_to_stop: DEBUG and Log.note("thread {{name|quote}} is done, wait for join", name=self.name) # WHERE DO WE PUT THE THREAD RESULT? # IF NO THREAD JOINS WITH THIS, THEN WHAT DO WE DO WITH THE RESULT? # HOW LONG DO WE WAIT FOR ANOTHER TO ACCEPT THE RESULT? # # WAIT 60seconds, THEN SEND RESULT TO LOGGER (Till(seconds=60) | self.ready_to_stop).wait() self.stopped.go() if not self.ready_to_stop: if self.end_of_thread.exception: # THREAD FAILURES ARE A PROBLEM ONLY IF NO ONE WILL BE JOINING WITH IT try: Log.error( "Problem in thread {{name|quote}}", name=self.name, cause=e ) except Exception: sys.stderr.write( str("ERROR in thread: " + self.name + " " + text(e) + "\n") ) elif self.end_of_thread.response != None: Log.warning( "Thread {{thread}} returned a response, but was not joined with {{parent}} after 10min", thread=self.name, parent=self.parent.name ) else: # IF THREAD ENDS OK, AND NOTHING RETURNED, THEN FORGET ABOUT IT self.parent.remove_child(self)
def worker_bee(self, batch_size, period, error_target, please_stop): please_stop.then(lambda: self.add(THREAD_STOP)) _buffer = [] _post_push_functions = [] now = time() next_push = Till(till=now + period) # THE TIME WE SHOULD DO A PUSH last_push = now - period def push_to_queue(): if self.slow_queue.__class__.__name__ == "Index": if self.slow_queue.settings.index.startswith("saved"): Log.alert("INSERT SAVED QUERY {{data|json}}", data=copy(_buffer)) self.slow_queue.extend(_buffer) del _buffer[:] for ppf in _post_push_functions: ppf() del _post_push_functions[:] while not please_stop: try: if not _buffer: item = self.pop() now = time() if now > last_push + period: next_push = Till(till=now + period) else: item = self.pop(till=next_push) now = time() if item is THREAD_STOP: push_to_queue() please_stop.go() break elif isinstance(item, types.FunctionType): _post_push_functions.append(item) elif item is not None: _buffer.append(item) except Exception as e: e = Except.wrap(e) if error_target: try: error_target(e, _buffer) except Exception as f: Log.warning( "`error_target` should not throw, just deal", name=self.name, cause=f) else: Log.warning("Unexpected problem", name=self.name, cause=e) try: if len(_buffer) >= batch_size or next_push: if _buffer: push_to_queue() last_push = now = time() next_push = Till(till=now + period) except Exception as e: e = Except.wrap(e) if error_target: try: error_target(e, _buffer) except Exception as f: Log.warning( "`error_target` should not throw, just deal", name=self.name, cause=f) else: Log.warning( "Problem with {{name}} pushing {{num}} items to data sink", name=self.name, num=len(_buffer), cause=e) if _buffer: # ONE LAST PUSH, DO NOT HAVE TIME TO DEAL WITH ERRORS push_to_queue() self.slow_queue.add(THREAD_STOP)
def worker_bee(please_stop): please_stop.on_go(lambda: self.add(THREAD_STOP)) _buffer = [] _post_push_functions = [] now = time() next_push = Till(till=now + period) # THE TIME WE SHOULD DO A PUSH last_push = now - period def push_to_queue(): queue.extend(_buffer) del _buffer[:] for ppf in _post_push_functions: ppf() del _post_push_functions[:] while not please_stop: try: if not _buffer: item = self.pop() now = time() if now > last_push + period: # Log.note("delay next push") next_push = Till(till=now + period) else: item = self.pop(till=next_push) now = time() if item is THREAD_STOP: push_to_queue() please_stop.go() break elif isinstance(item, types.FunctionType): _post_push_functions.append(item) elif item is not None: _buffer.append(item) except Exception as e: e = Except.wrap(e) if error_target: try: error_target(e, _buffer) except Exception as f: Log.warning( "`error_target` should not throw, just deal", name=name, cause=f) else: Log.warning("Unexpected problem", name=name, cause=e) try: if len(_buffer) >= batch_size or next_push: if _buffer: push_to_queue() last_push = now = time() next_push = Till(till=now + period) except Exception as e: e = Except.wrap(e) if error_target: try: error_target(e, _buffer) except Exception as f: Log.warning( "`error_target` should not throw, just deal", name=name, cause=f) else: Log.warning( "Problem with {{name}} pushing {{num}} items to data sink", name=name, num=len(_buffer), cause=e) if _buffer: # ONE LAST PUSH, DO NOT HAVE TIME TO DEAL WITH ERRORS push_to_queue()
def timeout(please_stop): (Till(seconds=20) | please_stop).wait() please_stop.go()
def __init__( self, name, queue, # THE SLOWER QUEUE batch_size=None, # THE MAX SIZE OF BATCHES SENT TO THE SLOW QUEUE max_size=None, # SET THE MAXIMUM SIZE OF THE QUEUE, WRITERS WILL BLOCK IF QUEUE IS OVER THIS LIMIT period=None, # MAX TIME (IN SECONDS) BETWEEN FLUSHES TO SLOWER QUEUE silent=False, # WRITES WILL COMPLAIN IF THEY ARE WAITING TOO LONG error_target=None # CALL THIS WITH ERROR **AND THE LIST OF OBJECTS ATTEMPTED** # BE CAREFUL! THE THREAD MAKING THE CALL WILL NOT BE YOUR OWN! # DEFAULT BEHAVIOUR: THIS WILL KEEP RETRYING WITH WARNINGS ): if not _Log: _late_import() batch_size = coalesce(batch_size, int(max_size / 2) if max_size else None, 900) max_size = coalesce(max_size, batch_size * 2) # REASONABLE DEFAULT period = coalesce(period, 1) # SECONDS Queue.__init__(self, name=name, max=max_size, silent=silent) def worker_bee(please_stop): def stopper(): self.add(THREAD_STOP) please_stop.on_go(stopper) _buffer = [] _post_push_functions = [] now = time() next_push = Till(till=now + period) # THE TIME WE SHOULD DO A PUSH last_push = now - period def push_to_queue(): queue.extend(_buffer) del _buffer[:] for f in _post_push_functions: f() del _post_push_functions[:] while not please_stop: try: if not _buffer: item = self.pop() now = time() if now > last_push + period: # _Log.note("delay next push") next_push = Till(till=now + period) else: item = self.pop(till=next_push) now = time() if item is THREAD_STOP: push_to_queue() please_stop.go() break elif isinstance(item, types.FunctionType): _post_push_functions.append(item) elif item is not None: _buffer.append(item) except Exception, e: e = _Except.wrap(e) if error_target: try: error_target(e, _buffer) except Exception, f: _Log.warning( "`error_target` should not throw, just deal", name=name, cause=f) else: _Log.warning("Unexpected problem", name=name, cause=e) try: if len(_buffer) >= batch_size or next_push: next_push = Till(till=now + period) if _buffer: push_to_queue() last_push = now = time() except Exception, e: e = _Except.wrap(e) if error_target: try: error_target(e, _buffer) except Exception, f: _Log.warning( "`error_target` should not throw, just deal", name=name, cause=f)
def timeout(please_stop): (Till(seconds=1) | please_stop).wait() stop_main_thread()