async def _apply_dispatcher( self, dispatcher: DispatcherPlugin, payload: Payload, request: Request ) -> Set[str]: self.log.debug( f'Sending {payload.results.payload_id} to dispatcher ({dispatcher.plugin_name})' ) plugin_names: Set[str] = set() try: dispatcher_result = await dispatcher.get_dispatches(payload, request) except Exception as e: msg = 'dispatcher:failed to dispatch' self.log.exception(msg) request.errors.append( Error( plugin_name=dispatcher.plugin_name, error=helpers.format_exc(e, msg=msg), payload_id=payload.results.payload_id, ) ) return plugin_names if dispatcher_result: if dispatcher_result.plugin_names is not None: plugin_names.update(dispatcher_result.plugin_names) self.log.debug( f'Dispatching {payload.results.payload_id} to {plugin_names}' ) if dispatcher_result.meta is not None: payload.dispatch_meta[dispatcher.plugin_name] = dispatcher_result.meta return plugin_names
async def _apply_archiver( self, archiver: ArchiverPlugin, payload: Payload, request: Request ) -> None: archiver_response: Optional[ArchiverResponse] = None self.log.debug( f'Archiving {payload.results.payload_id} with {archiver.plugin_name}' ) try: archiver_response = await archiver.archive(payload, request) except Exception as e: msg = 'archiver:failed to archive' self.log.exception(msg) request.errors.append( Error( payload_id=payload.results.payload_id, plugin_name=archiver.plugin_name, error=helpers.format_exc(e, msg=msg), ) ) payload.results.plugins_run['archivers'].append(archiver.plugin_name) if archiver_response: if archiver_response.errors is not None: request.errors.extend(archiver_response.errors) if archiver_response.results is not None: payload.results.archivers[ archiver.plugin_name ] = archiver_response.results
def _get_deep_dispatches( self, payload: Payload, add_deep_dispatches: List[str], request_meta: RequestMeta, ) -> Tuple[Set[str], DefaultDict[str, List[str]]]: errors: DefaultDict[str, List[str]] = defaultdict(list) deep_dispatches = set(add_deep_dispatches) for ( deep_dispatcher_name, deep_dispatcher, ) in self._loaded_deep_dispatcher_plugins.items(): try: deep_dispatcher_result = deep_dispatcher.get_deep_dispatches( payload, request_meta) deep_dispatches.update(deep_dispatcher_result.plugin_names) if deep_dispatcher_result.meta is not None: payload.deep_dispatch_meta[ deep_dispatcher_name] = deep_dispatcher_result.meta except Exception as e: msg = 'deep dispatcher:failed to deep dispatch' self.log.exception(msg) errors[deep_dispatcher_name].append( helpers.format_exc(e, msg=msg)) return (deep_dispatches, errors)
def _resolve_dependencies( self, total_dispatches: Set[Tuple[Payload, str]], request: Request ) -> Tuple[Set[Tuple[Payload, WorkerPlugin]], Set[Tuple[Payload, str]]]: # Resolve dependencies for each worker plugin that we want to run total_can_run: Set[Tuple[Payload, WorkerPlugin]] = set() total_deferred: Set[Tuple[Payload, str]] = set() for payload, plugin in total_dispatches: try: can_run, deferred = self._resolve_plugin_dependencies( payload, plugin, request, set() ) except RuntimeError as e: self.log.exception(e) request.errors.append( Error( payload_id=payload.results.payload_id, plugin_name=plugin, error=helpers.format_exc(e), ) ) continue total_can_run.update(can_run) total_deferred.update(deferred) return total_can_run, total_deferred
async def _apply_connector(self, connector: ConnectorPlugin, response: StoqResponse) -> None: self.log.debug(f'Saving results to connector {connector.plugin_name}') try: await connector.save(response) except Exception as e: msg = f'Failed to save results using {connector.__module__}' self.log.exception(msg) error = Error(plugin_name=connector.plugin_name, error=helpers.format_exc(e, msg=msg))
async def _apply_worker( self, payload: Payload, plugin: WorkerPlugin, request: Request ) -> Tuple[Set[Tuple[Payload, str]], List[Payload]]: self.log.debug( f'Scanning Payload {payload.results.payload_id} with WorkerPlugin {plugin.plugin_name}' ) try: worker_response: Optional[WorkerResponse] = await plugin.scan( payload, request ) except Exception as e: worker_response = None msg = 'worker:failed to scan' self.log.exception(msg) request.errors.append( Error( payload_id=payload.results.payload_id, plugin_name=plugin.plugin_name, error=helpers.format_exc(e, msg=msg), ) ) payload.results.plugins_run['workers'].append(plugin.plugin_name) if not worker_response: return set(), [] if worker_response.results is not None: payload.results.workers[plugin.plugin_name] = worker_response.results request.errors.extend(worker_response.errors) additional_dispatches: Set[Tuple[Payload, str]] = { (payload, plugin_name) for plugin_name in worker_response.dispatch_to } extracted_payloads: List[Payload] = [ Payload( content=extracted_payload.content, payload_meta=extracted_payload.payload_meta, extracted_by=plugin.plugin_name, extracted_from=payload.results.payload_id, ) for extracted_payload in worker_response.extracted ] self.log.debug( f'Completed scan of {payload.results.payload_id} with ' f'{len(worker_response.results) if worker_response.results else 0} result keys, ' # type: ignore f'{len(additional_dispatches)} additional dispatches, and ' f'{len(extracted_payloads)} extracted payloads' ) return additional_dispatches, extracted_payloads
def _apply_decorators(self, response: StoqResponse) -> None: """Mutates the given StoqResponse object to include decorator information""" for plugin_name, decorator in self._loaded_decorator_plugins.items(): try: decorator_response = decorator.decorate(response) except Exception as e: msg = 'decorator' self.log.exception(msg) response.errors[plugin_name].append( helpers.format_exc(e, msg='decorator')) continue if decorator_response is None: continue if decorator_response.results is not None: response.decorators[plugin_name] = decorator_response.results if decorator_response.errors: response.errors[plugin_name].extend(decorator_response.errors)
async def _apply_decorator(self, decorator: DecoratorPlugin, response: StoqResponse) -> StoqResponse: """Mutates the given StoqResponse object to include decorator information""" self.log.debug(f'Applying decorator {decorator.plugin_name}') try: decorator_response = await decorator.decorate(response) except Exception as e: msg = 'decorator' self.log.exception(msg) error = Error(plugin_name=decorator.plugin_name, error=helpers.format_exc(e, msg=msg)) response.errors.append(error) return response if decorator_response is None: return response if decorator_response.results is not None: response.decorators[ decorator.plugin_name] = decorator_response.results if decorator_response.errors: response.errors.extend(decorator_response.errors) return response
def _resolve_plugin_dependencies( self, payload: Payload, plugin_name: str, request: Request, init_plugin_dependency_chain: Set[str], depth: int = 0, ) -> Tuple[Set[Tuple[Payload, WorkerPlugin]], Set[Tuple[Payload, str]]]: if plugin_name in init_plugin_dependency_chain: raise RecursionError( 'Circular required plugin dependency found, ' f'unable to process plugin {plugin_name}' ) if depth > self.max_required_worker_depth: raise RecursionError( f'Max required plugin depth {self.max_required_worker_depth} reached, ' 'unable to generate additional tasks' ) try: plugin: WorkerPlugin = self.load_plugin(plugin_name) # type: ignore except Exception as e: msg = f'Worker plugin {plugin_name} failed to load' self.log.exception(msg) request.errors.append( Error( payload_id=payload.results.payload_id, plugin_name=plugin_name, error=helpers.format_exc(e, msg=msg), ) ) return set(), set() if plugin_name in payload.results.plugins_run['workers']: return set(), set() can_run: Set[Tuple[Payload, WorkerPlugin]] = set() deferred: Set[Tuple[Payload, str]] = set() if self._plugin_can_run(payload, plugin): can_run.add((payload, plugin)) else: deferred.add((payload, plugin_name)) if len(plugin.required_workers) != 0: self.log.debug( f'{plugin_name} has dependencies of {", ".join(plugin.required_workers)}' ) plugin_dependency_chain = init_plugin_dependency_chain.copy() plugin_dependency_chain.add(plugin_name) for required_plugin in plugin.required_workers: ( required_plugin_can_run, required_plugin_deferred, ) = self._resolve_plugin_dependencies( payload, required_plugin, request, plugin_dependency_chain, depth + 1, ) can_run.update(required_plugin_can_run) deferred.update(required_plugin_deferred) return can_run, deferred
def _single_scan( self, payload: Payload, add_dispatch: List[str], add_deep_dispatch: List[str], request_meta: RequestMeta, ) -> Tuple[PayloadResults, List[Payload], DefaultDict[str, List[str]]]: extracted = [] errors: DefaultDict[str, List[str]] = defaultdict(list) dispatch_pass = 0 dispatches, dispatch_errors = self._get_dispatches( payload, add_dispatch, request_meta) if dispatch_errors: errors = helpers.merge_dicts(errors, dispatch_errors) for plugin_name in dispatches: try: plugin = self.load_plugin(plugin_name) except Exception as e: msg = 'worker:failed to load' self.log.exception(msg) errors[plugin_name].append(helpers.format_exc(e, msg=msg)) continue # Normal dispatches are the "1st round" of scanning payload.plugins_run['workers'][0].append(plugin_name) try: worker_response = plugin.scan(payload, request_meta) # pyre-ignore[16] except Exception as e: msg = 'worker:failed to scan' self.log.exception(msg) errors[plugin_name].append(helpers.format_exc(e, msg=msg)) continue if worker_response is None: continue if worker_response.results is not None: # Normal dispatches are the "1st round" of scanning payload.worker_results[0][ plugin_name] = worker_response.results extracted.extend([ Payload(ex.content, ex.payload_meta, plugin_name, payload.payload_id) for ex in worker_response.extracted ]) if worker_response.errors: errors[plugin_name].extend(worker_response.errors) while dispatch_pass < self.max_dispatch_passes: dispatch_pass += 1 deep_dispatches, deep_dispatch_errors = self._get_deep_dispatches( payload, add_deep_dispatch, request_meta) if deep_dispatch_errors: errors = helpers.merge_dicts(errors, deep_dispatch_errors) if deep_dispatches: # Add another entry for this round payload.plugins_run['workers'].append([]) payload.worker_results.append({}) else: break for plugin_name in deep_dispatches: try: plugin = self.load_plugin(plugin_name) except Exception as e: msg = f'deep dispatch:failed to load (pass {dispatch_pass}/{self.max_dispatch_passes})' self.log.exception(msg) errors[plugin_name].append(helpers.format_exc(e, msg=msg)) continue payload.plugins_run['workers'][dispatch_pass].append( plugin_name) try: worker_response = plugin.scan( # pyre-ignore[16] payload, request_meta) except Exception as e: msg = f'deep dispatch:failed to scan (pass {dispatch_pass}/{self.max_dispatch_passes})' self.log.exception(msg) errors[plugin_name].append(helpers.format_exc(e, msg=msg)) continue if worker_response is None: continue if worker_response.results is not None: payload.worker_results[dispatch_pass][ plugin_name] = worker_response.results extracted.extend([ Payload(ex.content, ex.payload_meta, plugin_name, payload.payload_id) for ex in worker_response.extracted ]) if worker_response.errors: errors[plugin_name].extend(worker_response.errors) payload_results = PayloadResults.from_payload(payload) if request_meta.archive_payloads and payload.payload_meta.should_archive: for plugin_name, archiver in self._loaded_dest_archiver_plugins.items( ): payload.plugins_run['archivers'].append(plugin_name) try: archiver_response = archiver.archive(payload, request_meta) except Exception as e: msg = 'archiver:failed to archive' self.log.exception(msg) errors[plugin_name].append(helpers.format_exc(e, msg=msg)) continue if archiver_response is None: continue if archiver_response.results is not None: payload_results.archivers[ plugin_name] = archiver_response.results if archiver_response.errors: errors[plugin_name].extend(archiver_response.errors) return (payload_results, extracted, errors)
def scan_payload( self, payload: Payload, request_meta: Optional[RequestMeta] = None, add_start_dispatch: Optional[List[str]] = None, add_start_deep_dispatch: Optional[List[str]] = None, ) -> StoqResponse: """ Scan an individual payload :param payload: ``Payload`` object of data to be scanned :param request_meta: Metadata pertaining to the originating request :param add_start_dispatch: Force first round of scanning to use specified plugins :param add_start_deep_dispatch: Force second round of scanning to use specified plugins :return: Complete scan results :rtype: StoqResponse """ request_meta = RequestMeta() if request_meta is None else request_meta add_start_dispatch = [] if add_start_dispatch is None else add_start_dispatch add_start_deep_dispatch = ([] if add_start_deep_dispatch is None else add_start_deep_dispatch) scan_results: List = [] errors: DefaultDict[str, List[str]] = defaultdict(list) scan_queue = [(payload, add_start_dispatch, add_start_deep_dispatch)] hashes_seen: Set[str] = set(helpers.get_sha256(payload.content)) for _recursion_level in range(self.max_recursion + 1): next_scan_queue: List[Tuple[Payload, List[str], List[str]]] = [] for payload, add_dispatch, add_deep_dispatch in scan_queue: payload_results, extracted, p_errors = self._single_scan( payload, add_dispatch, add_deep_dispatch, request_meta) scan_results.append(payload_results) # TODO: Add option for no-dedup for ex in extracted: ex_hash = helpers.get_sha256(ex.content) if ex_hash not in hashes_seen: hashes_seen.add(ex_hash) next_scan_queue.append( (ex, ex.payload_meta.dispatch_to, [])) errors = helpers.merge_dicts(errors, p_errors) scan_queue = next_scan_queue response = StoqResponse(results=scan_results, request_meta=request_meta, errors=errors) for plugin_name, decorator in self._loaded_decorator_plugins.items(): try: decorator_response = decorator.decorate(response) except Exception as e: msg = 'decorator' self.log.exception(msg) response.errors[plugin_name].append( helpers.format_exc(e, msg='decorator')) continue if decorator_response is None: continue if decorator_response.results is not None: response.decorators[plugin_name] = decorator_response.results if decorator_response.errors: response.errors[plugin_name].extend(decorator_response.errors) for connector in self._loaded_connector_plugins: connector.save(response) return response