async def _apply_archiver( self, archiver: ArchiverPlugin, payload: Payload, request: Request ) -> None: archiver_response: Optional[ArchiverResponse] = None self.log.debug( f'Archiving {payload.results.payload_id} with {archiver.plugin_name}' ) try: archiver_response = await archiver.archive(payload, request) except Exception as e: msg = 'archiver:failed to archive' self.log.exception(msg) request.errors.append( Error( payload_id=payload.results.payload_id, plugin_name=archiver.plugin_name, error=helpers.format_exc(e, msg=msg), ) ) payload.results.plugins_run['archivers'].append(archiver.plugin_name) if archiver_response: if archiver_response.errors is not None: request.errors.extend(archiver_response.errors) if archiver_response.results is not None: payload.results.archivers[ archiver.plugin_name ] = archiver_response.results
def _resolve_dependencies( self, total_dispatches: Set[Tuple[Payload, str]], request: Request ) -> Tuple[Set[Tuple[Payload, WorkerPlugin]], Set[Tuple[Payload, str]]]: # Resolve dependencies for each worker plugin that we want to run total_can_run: Set[Tuple[Payload, WorkerPlugin]] = set() total_deferred: Set[Tuple[Payload, str]] = set() for payload, plugin in total_dispatches: try: can_run, deferred = self._resolve_plugin_dependencies( payload, plugin, request, set() ) except RuntimeError as e: self.log.exception(e) request.errors.append( Error( payload_id=payload.results.payload_id, plugin_name=plugin, error=helpers.format_exc(e), ) ) continue total_can_run.update(can_run) total_deferred.update(deferred) return total_can_run, total_deferred
async def _apply_dispatcher( self, dispatcher: DispatcherPlugin, payload: Payload, request: Request ) -> Set[str]: self.log.debug( f'Sending {payload.results.payload_id} to dispatcher ({dispatcher.plugin_name})' ) plugin_names: Set[str] = set() try: dispatcher_result = await dispatcher.get_dispatches(payload, request) except Exception as e: msg = 'dispatcher:failed to dispatch' self.log.exception(msg) request.errors.append( Error( plugin_name=dispatcher.plugin_name, error=helpers.format_exc(e, msg=msg), payload_id=payload.results.payload_id, ) ) return plugin_names if dispatcher_result: if dispatcher_result.plugin_names is not None: plugin_names.update(dispatcher_result.plugin_names) self.log.debug( f'Dispatching {payload.results.payload_id} to {plugin_names}' ) if dispatcher_result.meta is not None: payload.dispatch_meta[dispatcher.plugin_name] = dispatcher_result.meta return plugin_names
async def scan( self, payload: Payload, request: Request ) -> Optional[WorkerResponse]: if self.RAISE_EXCEPTION: raise Exception('Test exception please ignore') extracted_payload_content = self.EXTRACTED_PAYLOAD or b'Lorem ipsum' extracted_payload = ExtractedPayload( extracted_payload_content, PayloadMeta( should_scan=self.SHOULD_SCAN, dispatch_to=self.EXTRACTED_DISPATCH_TO ), ) wr = WorkerResponse({"valuable_insight": "wow"}, extracted=[extracted_payload]) if self.RETURN_ERRORS: wr.errors.append( Error( plugin_name="simple_worker", error="Test error please ignore", payload_id=payload.results.payload_id, ) ) if self.ADDITIONAL_DISPATCH_TO: wr.dispatch_to.extend(self.ADDITIONAL_DISPATCH_TO) return wr
async def _apply_connector(self, connector: ConnectorPlugin, response: StoqResponse) -> None: self.log.debug(f'Saving results to connector {connector.plugin_name}') try: await connector.save(response) except Exception as e: msg = f'Failed to save results using {connector.__module__}' self.log.exception(msg) error = Error(plugin_name=connector.plugin_name, error=helpers.format_exc(e, msg=msg))
async def decorate(self, response: StoqResponse) -> Optional[DecoratorResponse]: if self.RAISE_EXCEPTION: raise Exception('Test exception please ignore') dr = DecoratorResponse({'simple_decoration': 123}) if self.RETURN_ERRORS: dr.errors.append( Error(plugin_name='simple_decorator', error='Test error please ignore')) return dr
async def _apply_worker( self, payload: Payload, plugin: WorkerPlugin, request: Request ) -> Tuple[Set[Tuple[Payload, str]], List[Payload]]: self.log.debug( f'Scanning Payload {payload.results.payload_id} with WorkerPlugin {plugin.plugin_name}' ) try: worker_response: Optional[WorkerResponse] = await plugin.scan( payload, request ) except Exception as e: worker_response = None msg = 'worker:failed to scan' self.log.exception(msg) request.errors.append( Error( payload_id=payload.results.payload_id, plugin_name=plugin.plugin_name, error=helpers.format_exc(e, msg=msg), ) ) payload.results.plugins_run['workers'].append(plugin.plugin_name) if not worker_response: return set(), [] if worker_response.results is not None: payload.results.workers[plugin.plugin_name] = worker_response.results request.errors.extend(worker_response.errors) additional_dispatches: Set[Tuple[Payload, str]] = { (payload, plugin_name) for plugin_name in worker_response.dispatch_to } extracted_payloads: List[Payload] = [ Payload( content=extracted_payload.content, payload_meta=extracted_payload.payload_meta, extracted_by=plugin.plugin_name, extracted_from=payload.results.payload_id, ) for extracted_payload in worker_response.extracted ] self.log.debug( f'Completed scan of {payload.results.payload_id} with ' f'{len(worker_response.results) if worker_response.results else 0} result keys, ' # type: ignore f'{len(additional_dispatches)} additional dispatches, and ' f'{len(extracted_payloads)} extracted payloads' ) return additional_dispatches, extracted_payloads
async def _apply_decorator(self, decorator: DecoratorPlugin, response: StoqResponse) -> StoqResponse: """Mutates the given StoqResponse object to include decorator information""" self.log.debug(f'Applying decorator {decorator.plugin_name}') try: decorator_response = await decorator.decorate(response) except Exception as e: msg = 'decorator' self.log.exception(msg) error = Error(plugin_name=decorator.plugin_name, error=helpers.format_exc(e, msg=msg)) response.errors.append(error) return response if decorator_response is None: return response if decorator_response.results is not None: response.decorators[ decorator.plugin_name] = decorator_response.results if decorator_response.errors: response.errors.extend(decorator_response.errors) return response
def _resolve_plugin_dependencies( self, payload: Payload, plugin_name: str, request: Request, init_plugin_dependency_chain: Set[str], depth: int = 0, ) -> Tuple[Set[Tuple[Payload, WorkerPlugin]], Set[Tuple[Payload, str]]]: if plugin_name in init_plugin_dependency_chain: raise RecursionError( 'Circular required plugin dependency found, ' f'unable to process plugin {plugin_name}' ) if depth > self.max_required_worker_depth: raise RecursionError( f'Max required plugin depth {self.max_required_worker_depth} reached, ' 'unable to generate additional tasks' ) try: plugin: WorkerPlugin = self.load_plugin(plugin_name) # type: ignore except Exception as e: msg = f'Worker plugin {plugin_name} failed to load' self.log.exception(msg) request.errors.append( Error( payload_id=payload.results.payload_id, plugin_name=plugin_name, error=helpers.format_exc(e, msg=msg), ) ) return set(), set() if plugin_name in payload.results.plugins_run['workers']: return set(), set() can_run: Set[Tuple[Payload, WorkerPlugin]] = set() deferred: Set[Tuple[Payload, str]] = set() if self._plugin_can_run(payload, plugin): can_run.add((payload, plugin)) else: deferred.add((payload, plugin_name)) if len(plugin.required_workers) != 0: self.log.debug( f'{plugin_name} has dependencies of {", ".join(plugin.required_workers)}' ) plugin_dependency_chain = init_plugin_dependency_chain.copy() plugin_dependency_chain.add(plugin_name) for required_plugin in plugin.required_workers: ( required_plugin_can_run, required_plugin_deferred, ) = self._resolve_plugin_dependencies( payload, required_plugin, request, plugin_dependency_chain, depth + 1, ) can_run.update(required_plugin_can_run) deferred.update(required_plugin_deferred) return can_run, deferred
async def scan_request( self, request: Request, add_start_dispatch: Optional[List[str]] = None ) -> StoqResponse: """ Scan an individual payload :param request: ``Request`` object of payload(s) to be scanned :param add_start_dispatch: Force first round of scanning to use specified plugins """ self.log.debug( f'Request received: RequestMeta: {helpers.dumps(request.request_meta, indent=0)}, ' f'start_dispatches: {helpers.dumps(add_start_dispatch, indent=0)}' ) add_dispatches: Set[Tuple[Payload, str]] = set() hashes_seen: DefaultDict[str, List] = defaultdict(list) for idx, payload in enumerate(request.payloads): if payload.results.payload_meta.should_scan and add_start_dispatch: for plugin_name in add_start_dispatch: add_dispatches.add((payload, plugin_name)) sha = helpers.get_sha256(payload.content) hashes_seen[sha].append(idx) for _recursion_level in range(1, self.max_recursion + 1): self.log.debug(f'Beginning worker round {_recursion_level}') scan_result = await self._execute_scan_round(request, add_dispatches) if scan_result is None: self.log.debug('No more plugins to run, completing scan') break extracted_payloads, add_dispatches = scan_result # TODO: Add option for no-dedup for extracted_payload in extracted_payloads: payload_hash = helpers.get_sha256(extracted_payload.content) if payload_hash not in hashes_seen: self.log.debug( f'Extracted payload {extracted_payload.results.payload_id} with ' f'PayloadMeta: {extracted_payload.results.payload_meta}' ) request.payloads.append(extracted_payload) hashes_seen[payload_hash].append(len(request.payloads) - 1) payload_meta = extracted_payload.results.payload_meta if _recursion_level >= self.max_recursion: request.errors.append( Error( error=f'Final worker round ({_recursion_level}) reached, unable to process payload', payload_id=extracted_payload.results.payload_id, ) ) elif payload_meta.should_scan and payload_meta.dispatch_to: add_dispatches.update( (extracted_payload, add_dispatch) for add_dispatch in payload_meta.dispatch_to ) else: payload_idx = hashes_seen[payload_hash] for idx in payload_idx: request.payloads[idx].results.extracted_by.extend( extracted_payload.results.extracted_by ) request.payloads[idx].results.extracted_from.extend( extracted_payload.results.extracted_from ) archive_tasks: List = [] if request.request_meta.archive_payloads: for payload in request.payloads: if not payload.results.payload_meta.should_archive: continue for archiver in self._loaded_dest_archiver_plugins.values(): archive_tasks.append( self._apply_archiver(archiver, payload, request) ) await asyncio.gather(*archive_tasks) response = StoqResponse(request=request) decorator_tasks = [] for decorator in self._loaded_decorator_plugins.values(): decorator_tasks.append(self._apply_decorator(decorator, response)) await asyncio.gather(*decorator_tasks) connector_tasks = [] for connector in self._loaded_connector_plugins: connector_tasks.append(self._apply_connector(connector, response)) await asyncio.gather(*connector_tasks) return response