def _handle_recv_back(self, msg): # do the job and send the result if self.debug: logger.debug('Job received') target = timed()(self.target) else: target = self.target duration = -1 # results are sent with a PID:OK: or a PID:ERROR prefix try: with self.timer.run_job(): res = target(Job.load_from_string(msg[0])) # did we timout ? if self.timer.timed_out: # let's dump the last for line in self.timer.last_dump: logger.error(line) if self.debug: duration, res = res res = '%d:OK:%s' % (self.pid, res) except Exception, e: exc_type, exc_value, exc_traceback = sys.exc_info() exc = traceback.format_tb(exc_traceback) exc.insert(0, str(e)) res = '%d:ERROR:%s' % (self.pid, '\n'.join(exc)) logger.error(res)
def _handle_recv_back(self, msg): # back => front logger.debug('front <- back') try: self._frontstream.send_multipart(msg) except Exception, e: # we don't want to die on error. we just log it exc_type, exc_value, exc_traceback = sys.exc_info() exc = traceback.format_tb(exc_traceback) exc.insert(0, str(e)) logger.error('\n'.join(exc))
def _handle_recv_front(self, msg, tentative=0): # front => back # if the last part of the message is 'PING', we just PONG back # this is used as a health check if msg[-1] == 'PING': self._frontstream.send_multipart(msg[:-1] + [str(os.getpid())]) return #logger.debug('front -> back [choosing a worker]') if tentative == 3: logger.debug('No workers') self._frontstream.send_multipart(msg[:-1] + ['%d:ERROR:No worker' % os.getpid()]) return # we want to decide who's going to do the work found_worker = False while not found_worker and len(self._workers) > 0: worker_id = random.choice(self._workers) if not self._check_worker(worker_id): self._remove_worker(worker_id) else: found_worker = True if not found_worker: logger.debug('No worker, will try later') later = time.time() + 0.5 + (tentative * 0.2) self.loop.add_timeout(later, lambda: self._handle_recv_front(msg, tentative + 1)) return # start the timer self._worker_times[worker_id] = time.time(), None # now we can send to the right guy msg.insert(0, worker_id) #logger.debug('front -> back [%s]' % worker_id) try: self._backstream.send_multipart(msg) except Exception, e: # we don't want to die on error. we just log it exc_type, exc_value, exc_traceback = sys.exc_info() exc = traceback.format_tb(exc_traceback) exc.insert(0, str(e)) logger.error('\n'.join(exc))
def _handle_recv_back(self, msg): # do the job and send the result if self.debug: logger.debug("Job received") target = timed()(self.target) else: target = self.target duration = -1 # results are sent with a PID:OK: or a PID:ERROR prefix riemann_message = { "host": socket.gethostname(), "service": "powerhose-worker", "metric": 1, "state": "ok", "tags": ["running"], } try: with self.timer.run_job(): self.riemann.send(riemann_message) res = target(Job.load_from_string(msg[0])) # did we timout ? if self.timer.timed_out: # let's dump the last riemann_message["state"] = "error" riemann_message["tags"] = ["timeouts"] self.riemann.send(riemann_message) for line in self.timer.last_dump: logger.error(line) if self.debug: duration, res = res res = "%d:OK:%s" % (self.pid, res) except Exception, e: riemann_message["description"] = str(e) riemann_message["state"] = "error" riemann_message["tags"] = ["faults"] exc_type, exc_value, exc_traceback = sys.exc_info() exc = traceback.format_tb(exc_traceback) exc.insert(0, str(e)) res = "%d:ERROR:%s" % (self.pid, "\n".join(exc)) logger.error(res)
def _handle_recv_front(self, msg): # front => back logger.debug('front -> back') # if the last part of the message is 'PING', we just PONG back # this is used as a health check if msg[-1] == 'PING': self._frontstream.send_multipart(msg[:-1] + [str(os.getpid())]) return try: self._backstream.send_multipart(msg) except Exception, e: # we don't want to die on error. we just log it exc_type, exc_value, exc_traceback = sys.exc_info() exc = traceback.format_tb(exc_traceback) exc.insert(0, str(e)) logger.error('\n'.join(exc))
def _handle_recv_back(self, msg): # back => front #logger.debug('front <- back [%s]' % msg[0]) # let's remove the worker id and track the time it took worker_id = msg[0] msg = msg[1:] now = time.time() if worker_id in self._worker_times: start, stop = self._worker_times[worker_id] self._worker_times[worker_id] = start, now else: self._worker_times[worker_id] = now, now try: self._frontstream.send_multipart(msg) except Exception, e: # we don't want to die on error. we just log it exc_type, exc_value, exc_traceback = sys.exc_info() exc = traceback.format_tb(exc_traceback) exc.insert(0, str(e)) logger.error('\n'.join(exc))