def scan_payload( self, payload: Payload, request_meta: Optional[RequestMeta] = None, add_start_dispatch: Optional[List[str]] = None, add_start_deep_dispatch: Optional[List[str]] = None, ) -> StoqResponse: """ Scan an individual payload :param payload: ``Payload`` object of data to be scanned :param request_meta: Metadata pertaining to the originating request :param add_start_dispatch: Force first round of scanning to use specified plugins :param add_start_deep_dispatch: Force second round of scanning to use specified plugins :return: Complete scan results :rtype: StoqResponse """ request_meta = RequestMeta() if request_meta is None else request_meta add_start_dispatch = [] if add_start_dispatch is None else add_start_dispatch add_start_deep_dispatch = ([] if add_start_deep_dispatch is None else add_start_deep_dispatch) scan_results: List = [] errors: DefaultDict[str, List[str]] = defaultdict(list) scan_queue = [(payload, add_start_dispatch, add_start_deep_dispatch)] hashes_seen: Set[str] = set(helpers.get_sha256(payload.content)) for _recursion_level in range(self.max_recursion + 1): next_scan_queue: List[Tuple[Payload, List[str], List[str]]] = [] for payload, add_dispatch, add_deep_dispatch in scan_queue: payload_results, extracted, p_errors = self._single_scan( payload, add_dispatch, add_deep_dispatch, request_meta) scan_results.append(payload_results) # TODO: Add option for no-dedup for ex in extracted: ex_hash = helpers.get_sha256(ex.content) if ex_hash not in hashes_seen: hashes_seen.add(ex_hash) next_scan_queue.append( (ex, ex.payload_meta.dispatch_to, [])) errors = helpers.merge_dicts(errors, p_errors) scan_queue = next_scan_queue response = StoqResponse(results=scan_results, request_meta=request_meta, errors=errors) self._apply_decorators(response) for connector in self._loaded_connector_plugins: try: connector.save(response) except Exception: self.log.exception( f'Failed to save results using {connector.__module__}: {response}' ) return response
def test_dict_merge(self): d1 = defaultdict(list) d1['testkey'].append('test value') d1['anotherkey'].append('another value') d2 = defaultdict(list) d2['testkey'].append('merged test value') d2['anotherkey'].append('merged another value') self.assertEqual( helpers.merge_dicts(d1, d2), { 'testkey': ['test value', 'merged test value'], 'anotherkey': ['another value', 'merged another value'], }, )
def _single_scan( self, payload: Payload, add_dispatch: List[str], add_deep_dispatch: List[str], request_meta: RequestMeta, ) -> Tuple[PayloadResults, List[Payload], DefaultDict[str, List[str]]]: extracted = [] errors: DefaultDict[str, List[str]] = defaultdict(list) dispatch_pass = 0 dispatches, dispatch_errors = self._get_dispatches( payload, add_dispatch, request_meta) if dispatch_errors: errors = helpers.merge_dicts(errors, dispatch_errors) for plugin_name in dispatches: try: plugin = self.load_plugin(plugin_name) except Exception as e: msg = 'worker:failed to load' self.log.exception(msg) errors[plugin_name].append(helpers.format_exc(e, msg=msg)) continue # Normal dispatches are the "1st round" of scanning payload.plugins_run['workers'][0].append(plugin_name) try: worker_response = plugin.scan(payload, request_meta) # pyre-ignore[16] except Exception as e: msg = 'worker:failed to scan' self.log.exception(msg) errors[plugin_name].append(helpers.format_exc(e, msg=msg)) continue if worker_response is None: continue if worker_response.results is not None: # Normal dispatches are the "1st round" of scanning payload.worker_results[0][ plugin_name] = worker_response.results extracted.extend([ Payload(ex.content, ex.payload_meta, plugin_name, payload.payload_id) for ex in worker_response.extracted ]) if worker_response.errors: errors[plugin_name].extend(worker_response.errors) while dispatch_pass < self.max_dispatch_passes: dispatch_pass += 1 deep_dispatches, deep_dispatch_errors = self._get_deep_dispatches( payload, add_deep_dispatch, request_meta) if deep_dispatch_errors: errors = helpers.merge_dicts(errors, deep_dispatch_errors) if deep_dispatches: # Add another entry for this round payload.plugins_run['workers'].append([]) payload.worker_results.append({}) else: break for plugin_name in deep_dispatches: try: plugin = self.load_plugin(plugin_name) except Exception as e: msg = f'deep dispatch:failed to load (pass {dispatch_pass}/{self.max_dispatch_passes})' self.log.exception(msg) errors[plugin_name].append(helpers.format_exc(e, msg=msg)) continue payload.plugins_run['workers'][dispatch_pass].append( plugin_name) try: worker_response = plugin.scan( # pyre-ignore[16] payload, request_meta) except Exception as e: msg = f'deep dispatch:failed to scan (pass {dispatch_pass}/{self.max_dispatch_passes})' self.log.exception(msg) errors[plugin_name].append(helpers.format_exc(e, msg=msg)) continue if worker_response is None: continue if worker_response.results is not None: payload.worker_results[dispatch_pass][ plugin_name] = worker_response.results extracted.extend([ Payload(ex.content, ex.payload_meta, plugin_name, payload.payload_id) for ex in worker_response.extracted ]) if worker_response.errors: errors[plugin_name].extend(worker_response.errors) payload_results = PayloadResults.from_payload(payload) if request_meta.archive_payloads and payload.payload_meta.should_archive: for plugin_name, archiver in self._loaded_dest_archiver_plugins.items( ): payload.plugins_run['archivers'].append(plugin_name) try: archiver_response = archiver.archive(payload, request_meta) except Exception as e: msg = 'archiver:failed to archive' self.log.exception(msg) errors[plugin_name].append(helpers.format_exc(e, msg=msg)) continue if archiver_response is None: continue if archiver_response.results is not None: payload_results.archivers[ plugin_name] = archiver_response.results if archiver_response.errors: errors[plugin_name].extend(archiver_response.errors) return (payload_results, extracted, errors)