def _StartWorkerProcess(self, process_name): """Creates, starts, monitors and registers a worker process. Args: process_name (str): process name. Returns: MultiProcessWorkerProcess: extraction worker process or None on error. """ analysis_plugin = self._analysis_plugins.get(process_name, None) if not analysis_plugin: logger.error('Missing analysis plugin: {0:s}'.format(process_name)) return None queue_name = '{0:s} output event queue'.format(process_name) output_event_queue = zeromq_queue.ZeroMQPushBindQueue( name=queue_name, timeout_seconds=self._QUEUE_TIMEOUT) # Open the queue so it can bind to a random port, and we can get the # port number to use in the input queue. output_event_queue.Open() self._event_queues[process_name] = output_event_queue queue_name = '{0:s} input event queue'.format(process_name) input_event_queue = zeromq_queue.ZeroMQPullConnectQueue( name=queue_name, delay_open=True, port=output_event_queue.port, timeout_seconds=self._QUEUE_TIMEOUT) process = analysis_process.AnalysisProcess( input_event_queue, self._knowledge_base, self._session, analysis_plugin, self._processing_configuration, data_location=self._data_location, event_filter_expression=self._event_filter_expression, name=process_name) process.start() logger.info('Started analysis plugin: {0:s} (PID: {1:d}).'.format( process_name, process.pid)) try: self._StartMonitoringProcess(process) except (IOError, KeyError) as exception: logger.error( ('Unable to monitor analysis plugin: {0:s} (PID: {1:d}) ' 'with error: {2!s}').format(process_name, process.pid, exception)) process.terminate() return None self._RegisterProcess(process) return process
def _StartAnalysisProcesses(self, analysis_plugins): """Starts the analysis processes. Args: analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that should be run and their names. """ logger.info('Starting analysis plugins.') for analysis_plugin in analysis_plugins.values(): self._analysis_plugins[analysis_plugin.NAME] = analysis_plugin process = self._StartWorkerProcess(analysis_plugin.NAME) if not process: logger.error('Unable to create analysis process: {0:s}'.format( analysis_plugin.NAME)) logger.info('Analysis plugins running')
def _CheckStatusWorkerProcess(self, pid): """Checks the status of a worker process. If a worker process is not responding the process is terminated and a replacement process is started. Args: pid (int): process ID (PID) of a registered worker process. Raises: KeyError: if the process is not registered with the engine. """ # TODO: Refactor this method, simplify and separate concerns (monitoring # vs management). self._RaiseIfNotRegistered(pid) process = self._processes_per_pid[pid] process_status = self._QueryProcessStatus(process) if process_status is None: process_is_alive = False else: process_is_alive = True process_information = self._process_information_per_pid[pid] used_memory = process_information.GetUsedMemory() or 0 if self._worker_memory_limit and used_memory > self._worker_memory_limit: logger.warning(( 'Process: {0:s} (PID: {1:d}) killed because it exceeded the ' 'memory limit: {2:d}.').format( process.name, pid, self._worker_memory_limit)) self._KillProcess(pid) if isinstance(process_status, dict): self._rpc_errors_per_pid[pid] = 0 status_indicator = process_status.get('processing_status', None) else: rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1 self._rpc_errors_per_pid[pid] = rpc_errors if rpc_errors > self._MAXIMUM_RPC_ERRORS: process_is_alive = False if process_is_alive: rpc_port = process.rpc_port.value logger.warning(( 'Unable to retrieve process: {0:s} (PID: {1:d}) status via ' 'RPC socket: http://localhost:{2:d}').format( process.name, pid, rpc_port)) processing_status_string = 'RPC error' status_indicator = definitions.STATUS_INDICATOR_RUNNING else: processing_status_string = 'killed' status_indicator = definitions.STATUS_INDICATOR_KILLED process_status = { 'processing_status': processing_status_string} self._UpdateProcessingStatus(pid, process_status, used_memory) # _UpdateProcessingStatus can also change the status of the worker, # So refresh the status if applicable. for worker_status in self._processing_status.workers_status: if worker_status.pid == pid: status_indicator = worker_status.status break if status_indicator in definitions.ERROR_STATUS_INDICATORS: logger.error(( 'Process {0:s} (PID: {1:d}) is not functioning correctly. ' 'Status code: {2!s}.').format(process.name, pid, status_indicator)) self._TerminateProcessByPid(pid) replacement_process = None for replacement_process_attempt in range( self._MAXIMUM_REPLACEMENT_RETRIES): logger.info(( 'Attempt: {0:d} to start replacement worker process for ' '{1:s}').format(replacement_process_attempt + 1, process.name)) replacement_process = self._StartWorkerProcess(process.name) if replacement_process: break time.sleep(self._REPLACEMENT_WORKER_RETRY_DELAY) if not replacement_process: logger.error( 'Unable to create replacement worker process for: {0:s}'.format( process.name))