def start(self): multiprocessing.set_start_method('spawn') signal.signal(signal.SIGINT, self._handle_signal_stop) signal.signal(signal.SIGTERM, self._handle_signal_stop) signal.signal(signal.SIGUSR1, self._handle_signal_usr1) self._running = True while self._running: self._start_workers() self._process_queue() self._check_hanging() self._remove_workers() time.sleep(0.25) print('\nGently asking workers to stop after their current job ...') for worker_info in self._workers.values(): worker_info.stop() while not self._force_stop and self._workers: workers_str = self._get_running_workers_str() print('{} workers still alive: {}'.format(len(self._workers), workers_str)) self._check_hanging() self._remove_workers() time.sleep(0.25) if self._workers: print('Forcefully killing workers ...') for worker_info in self._workers.values(): kill_everything(worker_info.pid) print('All workers stopped. Shutting down ...')
def run(self): while self._max_executions > 0: # Stop if our master died. if self._ppid != os.getppid(): break # Our master asked us to stop. We must obey. if self._stop_event.is_set(): break job = self._job_queue.get_job_nowait() if job is None: time.sleep(1) continue start_info = (job.scan_id, job.scan_module.name, datetime.today(), job.num_tries) self._notify_master('job_started', start_info) result = Result(job.current_result, NoOpFileHandler()) logger = logging.Logger(job.scan_module.name) logger.addHandler( WorkerWritePipeHandler(self._pid, self._write_pipe)) logger.addHandler(ScanStreamHandler()) scan_meta = ScanMeta(worker_id=self._id, num_tries=job.num_tries) with tempfile.TemporaryDirectory() as temp_dir: old_cwd = os.getcwd() os.chdir(temp_dir) try: job.scan_module.logger = logger job.scan_module.scan_site(result, scan_meta) except RetryScan: self._job_queue.report_failure() self._notify_master('job_failed', (datetime.today(), )) except RescheduleLater as e: self._job_queue.reschedule(e.not_before) self._job_queue.report_result(result.get_updates()) self._notify_master('job_finished', (datetime.today(), )) except Exception: logger.exception('Scan module `%s` failed.', job.scan_module.name) self._job_queue.report_failure() self._notify_master('job_failed', (datetime.today(), )) if self._raven_client: self._raven_client.captureException( tags={ 'scan_id': job.scan_id, 'scan_module_name': job.scan_module.name }, extra={'result': result.get_results()}) else: self._job_queue.report_result(result.get_updates()) self._notify_master('job_finished', (datetime.today(), )) finally: os.chdir(old_cwd) kill_everything(self._pid, only_children=True) self._max_executions -= 1 kill_everything(self._pid)
def _check_hanging(self): for worker_info in self._workers.values(): max_execution_time = self.max_execution_times.get( worker_info.scan_module, self.max_execution_time) if max_execution_time is None: continue if worker_info.get_execution_time() > max_execution_time: worker_info.notify_job_failed() self._event_job_failed(worker_info.scan_id, worker_info.scan_module) kill_everything(worker_info.pid) self._terminated_worker_pids.add(worker_info.pid)
def __exit__(self, exc_type, exc_val, exc_tb): kill_everything(self._p.pid) self._temp_dir.cleanup()