def test_dont_dest_archive_payload(self): s = Stoq(base_dir=utils.get_data_dir(), dest_archivers=['dummy_archiver']) dummy_archiver = s.load_plugin('dummy_archiver') dummy_archiver.archive = create_autospec(dummy_archiver.archive, return_value=None) response = s.scan( self.generic_content, payload_meta=PayloadMeta(should_archive=False), add_start_dispatch=['extract_random'], request_meta=RequestMeta(archive_payloads=True), ) dummy_archiver.archive.assert_called_once() self.assertNotIn('dummy_archiver', response.results[0].plugins_run['archivers']) self.assertIn('dummy_archiver', response.results[1].plugins_run['archivers'])
def scan(self, payload: Payload, request_meta: RequestMeta) -> WorkerResponse: extracted = [] tnef_results = TNEF(payload.content) if tnef_results.attachments: for tnef_attachment in tnef_results.attachments: try: filename = UnicodeDammit( tnef_attachment.name).unicode_markup except: filename = "None" tnef_meta = PayloadMeta(extra_data={'filename': filename}) attachment = ExtractedPayload(tnef_attachment.data, tnef_meta) extracted.extend(attachment) return WorkerResponse({}, extracted=extracted)
def scan(self, payload: Payload, request_meta: RequestMeta) -> WorkerResponse: extracted = [] errors = [] try: parsed_xml = parseString(payload.content) except ExpatError as err: errors.append( f'Unable to parse payload as XML with xdpcarve: {err}') return WorkerResponse(errors=errors) for name in self.elements: dom_element = parsed_xml.getElementsByTagName(name) for dom in dom_element: content = dom.firstChild.nodeValue content = content.rstrip() try: content = base64.b64decode(content) except: pass meta = PayloadMeta(extra_data={"element_name": name}) extracted.append(ExtractedPayload(content, meta)) return WorkerResponse(extracted=extracted, errors=errors)
async def scan(self, payload: Payload, request: Request) -> WorkerResponse: """ Carve PE files from provided payload """ extracted = [] content = BytesIO(payload.content) content.seek(0) for start, end in self._carve(content): content.seek(start) try: pe = pefile.PE(data=content.read()) except: continue meta = PayloadMeta(extra_data={'offset': start}) extracted.append(ExtractedPayload(pe.trim(), meta)) content.seek(0) pe.close() return WorkerResponse(extracted=extracted)
async def ingest(self, queue: Queue) -> None: """ Monitor a directory for newly created files for ingest """ self.log.info( f'Monitoring {self.source_dir} for newly created files...') async for changes in awatch(self.source_dir): for change in list(changes): event = change[0] src_path = os.path.abspath(change[1]) # Only handle Change.added if event != 1: continue meta = PayloadMeta( extra_data={ 'filename': os.path.basename(src_path), 'source_dir': os.path.dirname(src_path), }) with open(src_path, 'rb') as f: payload = Payload(f.read(), meta) await queue.put(payload)
def decompress(self, content: BytesIO, offset: int = 0): """ Extract and decompress an SWF object """ errors = [] meta = None swf = None try: """ Header as obtained from SWF File Specification: Field Type Comment Signature UI8 Signature byte: - “F” indicates uncompressed - “C” indicates a zlib compressed SWF (SWF 6 and later only) - “Z” indicates a LZMA compressed SWF (SWF 13 and later only) - Signature UI8 Signature byte always “W” - Signature UI8 Signature byte always “S” - Version UI8 Single byte file version (for example, 0x06 for SWF 6) - FileLength UI32 Length of entire file in bytes """ # Jump to the proper offset content.seek(offset) # Grab the first three bytes, should be FWS, CWS or ZWS magic = content.read(3).decode() # Grab the SWF version - 1 byte swf_version = struct.unpack('<b', content.read(1))[0] # Grab next 4 bytes so we can unpack to calculate the uncompressed # size of the payload. decompressed_size = struct.unpack("<i", content.read(4))[0] - 8 # Let's go back to the offset byte, jumping beyond the SWF header content.seek(offset + 3) # Make sure our header is that of a decompressed SWF plus the # original version and size headers composite_header = b'FWS' + content.read(5) # Determine the compression type, ZLIB or LZMA, then decompress the # payload size minus 8 bytes of original header try: if magic == "ZWS": content.seek(12) content = pylzma.decompress( content.read(decompressed_size)) elif magic == "CWS": content = zlib.decompress(content.read(decompressed_size)) elif magic == 'FWS': # Not compressed, but let's return the payload based on the # size defined in the header content = content.read(decompressed_size) else: return None, errors except: return None, errors if len(content) != decompressed_size: errors.append( 'Invalid size of carved SWF content: {len(content)} != {decompressed_size}' ) else: swf = composite_header + content meta = PayloadMeta(extra_data={ 'offset': offset, 'swf_version': swf_version }) extracted = ExtractedPayload(swf, meta) except: errors.append( f'Unable to decompress SWF payload at offset {offset}') return extracted, errors
def test_reconstruct_all_subresponses(self): # Construct a fake stoq_response as if it were generated from a file # A.zip that contains two files, B.txt and C.zip, where C.zip contains D.txt initial_response = StoqResponse( results=[ PayloadResults( payload_id="A.zip", size=0, payload_meta=PayloadMeta(), workers=[{"fake": "result1"}], plugins_run={"workers": [["fake"]]}, ), PayloadResults( payload_id="B.txt", size=0, payload_meta=PayloadMeta(), workers=[{"fake": "result2"}], plugins_run={"workers": [["fake"]]}, extracted_from="A.zip", extracted_by="fake", ), PayloadResults( payload_id="C.zip", size=0, payload_meta=PayloadMeta(), workers=[{"fake": "result3"}], plugins_run={"workers": [["fake"]]}, extracted_from="A.zip", extracted_by="fake", ), PayloadResults( payload_id="D.txt", size=0, payload_meta=PayloadMeta(), workers=[{"fake": "result4"}], plugins_run={"workers": [["fake"]]}, extracted_from="C.zip", extracted_by="fake", ), ], request_meta=RequestMeta(extra_data={"check": "me"}), errors={}, ) s = Stoq(base_dir=utils.get_data_dir(), decorators=["simple_decorator"]) all_subresponses = list(s.reconstruct_all_subresponses(initial_response)) # We expect there to be four "artificial" responses generated, one for # each payload as the root. self.assertEqual(len(all_subresponses), 4) # We expect the first response to have all 4 payloads, the second response # to have just the second payload, the third response to have the third # and fourth payload, and the fourth response to have just the fourth payload self.assertEqual( [len(stoq_response.results) for stoq_response in all_subresponses], [4, 1, 2, 1] ) self.assertEqual( [ stoq_response.results[0].workers[0]["fake"] for stoq_response in all_subresponses ], ["result1", "result2", "result3", "result4"], ) self.assertTrue( all( "simple_decorator" in stoq_response.decorators for stoq_response in all_subresponses ) ) # Assert that they all have the same scan ID self.assertEqual( len({stoq_response.scan_id for stoq_response in all_subresponses}), 1 )
def test_payloadmeta_to_str(self): response = PayloadMeta() response_str = str(response) response_dict = json.loads(response_str) self.assertIsInstance(response_str, str) self.assertIsInstance(response_dict, dict)
def main() -> None: about = f'stoQ :: v{__version__} :: an automated analysis framework' # If $STOQ_HOME exists, set our base directory to that, otherwise # use $HOME/.stoq try: stoq_home = str( Path(os.getenv('STOQ_HOME', f'{str(Path.home())}/.stoq')).resolve( strict=True ) ) except FileNotFoundError as err: print(f"$STOQ_HOME is invalid, exiting: {err}", file=sys.stderr) sys.exit(1) parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=about, epilog=''' Examples: - Scan a file with installed plugins and dispatch rules: $ %(prog)s scan mybadfile.exe - Scan a file and force it to go through the yara plugin: $ %(prog)s scan mybadfile.exe -s yara - Ingest from PubSub, force all payloads through yara, trid, and exif, then save results to file: $ %(prog)s run -a yara trid exif -P pubsub -C file - Monitor a directory (specified in dirmon.stoq) for newly created files send them to workers, and archive all payloads into MongoDB: $ %(prog)s run -P dirmon -A mongodb - Install a plugin from a directory $ %(prog)s install path/to/plugin_directory ''', ) subparsers = parser.add_subparsers(title='commands', dest='command') subparsers.required = True scan = subparsers.add_parser('scan', help='Scan a given payload') scan.add_argument( 'file', nargs='?', type=argparse.FileType('rb'), default=sys.stdin.buffer, help='File to scan, can also be provided from stdin', ) run = subparsers.add_parser( 'run', help='Continually ingest and scan payloads from Provider plugins' ) run.add_argument( '-P', '--providers', nargs='+', help='Provider plugins to ingest payloads from' ) # Add shared arguments so they still show up in the help dialog for subparser in [scan, run]: subparser.add_argument( '-A', '--dest-archivers', nargs='+', help='Archiver plugins to send payloads to', ) subparser.add_argument( '-S', '--source-archivers', nargs='+', help='Archiver plugins to read payload from', ) subparser.add_argument( '-D', '--decorators', nargs='+', help='Decorator plugins to send results to before saving', ) subparser.add_argument( '-C', '--connectors', nargs='+', help='Connector plugins to send results to' ) subparser.add_argument( '-R', '--dispatchers', nargs='+', help='Dispatcher plugins to use send payloads to', ) subparser.add_argument( '-a', '--always-dispatch', nargs='+', help='Worker plugins to always dispatch plugins to', ) subparser.add_argument( '-s', '--start-dispatch', nargs='+', help='Worker plugins to add to the original payload dispatch', ) subparser.add_argument( '--max-recursion', type=int, default=None, help='Maximum level of recursion into a payload and extracted payloads', ) subparser.add_argument('--plugin-opts', nargs='+', help='Plugin options') subparser.add_argument( '--request-source', default=None, help='Source name to add to initial scan request', ) subparser.add_argument( '--request-extra', nargs='+', help='Key/value pair to add to initial scan request metadata', ) subparser.add_argument( '--plugin-dir', nargs='+', help='Directory(ies) containing stoQ plugins' ) subparser.add_argument( '--config-file', default=f'{stoq_home}/stoq.cfg', help='Path to stoQ configuration file', ) subparser.add_argument( '--log-level', default=None, choices=['debug', 'info', 'warning', 'error' 'crtical'], help='Log level for stoQ events', ) plugin_list = subparsers.add_parser('list', help='List available plugins') plugin_list.add_argument( '--plugin-dir', nargs='+', help='Directory(ies) containing stoQ plugins' ) install = subparsers.add_parser('install', help='Install a given plugin') install.add_argument( 'plugin_path', help='Directory or Github repo of the plugin to install' ) install.add_argument( '--install_dir', default=os.path.join(stoq_home, 'plugins'), help='Override the default plugin installation directory', ) install.add_argument( '--upgrade', action='store_true', help='Force the plugin to be upgraded if it already exists', ) install.add_argument( '--github', action='store_true', help='Install plugin from Github repository' ) subparsers.add_parser('test', help='Run stoQ tests') args = parser.parse_args() plugin_opts: Union[Dict, None] = None try: if args.plugin_opts: plugin_opts = {} for arg in args.plugin_opts: plugin_name, plugin_option = arg.split(':', 1) opt, value = plugin_option.split('=', 1) if value.lower() == 'true': value = True elif value.lower() == 'false': value = False if plugin_name in plugin_opts: plugin_opts[plugin_name].update({opt: value}) else: plugin_opts[plugin_name] = {opt: value} except AttributeError: pass except ValueError as err: print(f'Failed parsing plugin option: {err}') request_meta = RequestMeta() try: if args.request_source: request_meta.source = args.request_source if args.request_extra: for arg in args.request_extra: extra_key, extra_value = arg.split('=', 1) if extra_value.lower() == 'true': extra_value = True elif extra_value.lower() == 'false': extra_value = False request_meta.extra_data[extra_key] = extra_value except AttributeError: pass except ValueError as err: print(f'Failed parsing request metadata option: {err}') try: if not os.path.isfile(args.config_file): print(f'Warning: {args.config_file} does not exist, using stoQ defaults!') except AttributeError: pass if args.command == 'scan': with args.file as f: # Verify that the file or stdin has some sort of data if not select.select([f], [], [], 0.0)[0]: print('Error: No content to scan was provided') sys.exit(2) content = f.read() if not content: print('Error: The provided content to scan was empty') sys.exit(2) if args.file.name == '<stdin>': filename = None else: path = args.file.name try: filename = os.path.basename(path.encode('utf-8')) except AttributeError: filename = os.path.basename(path) stoq = Stoq( base_dir=stoq_home, config_file=args.config_file, log_level=args.log_level, plugin_opts=plugin_opts, source_archivers=args.source_archivers, dest_archivers=args.dest_archivers, connectors=args.connectors, dispatchers=args.dispatchers, decorators=args.decorators, always_dispatch=args.always_dispatch, max_recursion=args.max_recursion, plugin_dir_list=args.plugin_dir, ) response = asyncio.get_event_loop().run_until_complete( stoq.scan( content, PayloadMeta(extra_data={'filename': filename}), request_meta=request_meta, add_start_dispatch=args.start_dispatch, ) ) if not args.connectors: print(response) elif args.command == 'run': stoq = Stoq( base_dir=stoq_home, config_file=args.config_file, log_level=args.log_level, plugin_opts=plugin_opts, providers=args.providers, source_archivers=args.source_archivers, dest_archivers=args.dest_archivers, connectors=args.connectors, dispatchers=args.dispatchers, decorators=args.decorators, always_dispatch=args.always_dispatch, max_recursion=args.max_recursion, plugin_dir_list=args.plugin_dir, ) asyncio.get_event_loop().run_until_complete( stoq.run(request_meta=request_meta, add_start_dispatch=args.start_dispatch) ) elif args.command == 'list': stoq = Stoq(base_dir=stoq_home, plugin_dir_list=args.plugin_dir) print(about) print('-' * len(about)) for name, info in stoq.list_plugins().items(): print(f'{name:<20s} v{info["version"]:<10s}{info["description"]}') print(f'\t\t\t\t- {", ".join(info["classes"]):<20s}') elif args.command == 'install': StoqPluginInstaller.install( args.plugin_path, args.install_dir, args.upgrade, args.github ) print(f'Successfully installed {args.plugin_path} into {args.install_dir}') elif args.command == 'test': test_path = os.path.dirname(tests.__file__) test_suite = unittest.TestLoader().discover(test_path) unittest.TextTestRunner(verbosity=1).run(test_suite)
def scan(self, payload: Payload, request_meta: RequestMeta) -> WorkerResponse: """ Decompress a payload request_meta: - passwords - archiver """ if len(payload.content) > self.maximum_size: raise StoqPluginException( f'Compressed file too large: {len(payload.content)} > {self.maximum_size}' ) archiver = None mimetype = None results = {} errors = [] extracted = [] passwords = request_meta.extra_data.get('passwords', self.passwords) if isinstance(passwords, str): passwords = [p.strip() for p in passwords.split(',')] # Determine the mimetype of the payload so we can identify the # correct archiver. This should either be based off the request_meta # (useful when payload is passed via dispatching) or via magic if 'archiver' in request_meta.extra_data: if request_meta.extra_data['archiver'] in self.ARCHIVE_CMDS: archiver = self.ARCHIVE_CMDS[ request_meta.extra_data['archiver']] else: raise StoqPluginException( f"Unknown archive type of {request_meta['archiver']}") else: mimetype = magic.from_buffer(payload.content, mime=True) if mimetype in self.ARCHIVE_MAGIC: archive_type = self.ARCHIVE_MAGIC[mimetype] if archive_type in self.ARCHIVE_CMDS: archiver = self.ARCHIVE_CMDS[archive_type] else: raise StoqPluginException( f'Unknown archive type of {archive_type}') if not archiver: raise StoqPluginException( f'Unable to determine archive type, mimetype: {mimetype}') with tempfile.TemporaryDirectory() as extract_dir: fd, archive_file = tempfile.mkstemp(dir=extract_dir) with open(fd, 'xb') as f: f.write(payload.content) f.flush() archive_outdir = tempfile.mkdtemp(dir=extract_dir) for password in passwords: cmd = archiver.replace('%INFILE%', shlex.quote(archive_file)) cmd = cmd.replace('%OUTDIR%', shlex.quote(archive_outdir)) cmd = cmd.replace('%PASSWORD%', shlex.quote(password)) cmd = cmd.split(" ") p = Popen(cmd, stdout=PIPE, stderr=PIPE, universal_newlines=True) try: outs, errs = p.communicate(timeout=self.timeout) except TimeoutExpired: p.kill() raise StoqPluginException( 'Timed out decompressing payload') if p.returncode == 0: break for root, dirs, files in os.walk(archive_outdir): for f in files: path = os.path.join(extract_dir, root, f) if os.path.getsize(path) > self.maximum_size: errors.append( f'Extracted object is too large ({os.path.getsize(path)} > {self.maximum_size})' ) continue with open(path, "rb") as extracted_file: meta = PayloadMeta(extra_data={'filename': f}) try: data = extracted_file.read() except OSError as err: errors.append( f'Unable to access extracted content: {err}') continue extracted.append(ExtractedPayload(data, meta)) return WorkerResponse(results, errors=errors, extracted=extracted)
def scan(self, payload: Payload, request_meta: RequestMeta) -> WorkerResponse: message_json: Dict[str, str] = {} attachments: List[ExtractedPayload] = [] errors: List[str] = [] ioc_content: str = '' session = UnicodeDammit(payload.content).unicode_markup message = Parser(policy=policy.default).parsestr(session) # Create a dict of the SMTP headers for header, value in message.items(): curr_header = header.lower() if curr_header in message_json: message_json[curr_header] += f'\n{value}' else: message_json[curr_header] = value if not self.omit_body: message_json['body'] = self._get_body(message, 'plain') message_json['body_html'] = self._get_body(message, 'html') if self.extract_iocs: for k in self.ioc_keys: if k in message_json: ioc_content += f'\n{message_json[k]}' elif k == 'body' and k not in message_json: b = self._get_body(message, 'plain') if b: ioc_content += b elif k == 'body_html' and k not in message_json: b = self._get_body(message, 'html') if b: ioc_content += b for mailpart in message.iter_attachments(): if mailpart.get_content_type() == 'message/rfc822': for part in mailpart.get_payload(): try: attachment_meta = PayloadMeta( should_archive=self.archive_attachments, extra_data={ 'charset': part.get_content_charset(), 'content-description': part.get('Content-Description'), 'disposition': part.get_content_disposition(), 'filename': part.get_filename(), 'type': part.get_content_type(), }, dispatch_to=['smtp'], ) attachment = ExtractedPayload(part.as_bytes(), attachment_meta) attachments.append(attachment) except Exception as err: errors.append(f'Failed rfc822 attachment: {err}') else: try: attachment_meta = PayloadMeta( should_archive=self.archive_attachments, extra_data={ 'charset': mailpart.get_content_charset(), 'content-description': mailpart.get('Content-Description'), 'disposition': mailpart.get_content_disposition(), 'filename': mailpart.get_filename(), 'type': mailpart.get_content_type(), }, dispatch_to=self.always_dispatch, ) attachment = ExtractedPayload(mailpart.get_content(), attachment_meta) attachments.append(attachment) except Exception as err: errors.append(f'Failed extracting attachment: {err}') if self.extract_iocs: ioc_meta = PayloadMeta(should_archive=False, dispatch_to=['iocextract']) attachments.append(ExtractedPayload(ioc_content.encode(), ioc_meta)) return WorkerResponse(message_json, errors=errors, extracted=attachments)
def scan(self, payload: Payload, request_meta: RequestMeta) -> WorkerResponse: message_json = {} attachments = [] errors = [] ioc_content = '' email_session = UnicodeDammit(payload.content).unicode_markup message = pyzmail.message_from_string(email_session) # Create a dict of the SMTP headers for header in message.keys(): curr_header = header.lower() if curr_header in message_json: # If the header key already exists, let's join them message_json[curr_header] += f'\n{message.get_decoded_header(header)}' else: message_json[curr_header] = message.get_decoded_header(header) if not self.omit_body: # Extract the e-mail body, to include HTML if available message_json['body'] = ( '' if message.text_part is None else UnicodeDammit(message.text_part.get_payload()).unicode_markup ) message_json['body_html'] = ( '' if message.html_part is None else UnicodeDammit(message.html_part.get_payload()).unicode_markup ) if self.extract_iocs: for k in self.ioc_keys: if k in message_json: ioc_content += f'{message_json[k]}\n' # Handle attachments for mailpart in message.mailparts: # Skip if the attachment is a body part if mailpart.is_body: if self.extract_iocs: ioc_content += UnicodeDammit(mailpart.get_payload()).unicode_markup elif mailpart.type.startswith('message/'): for part in mailpart.part.get_payload(): try: attachment_meta = PayloadMeta( should_archive=self.archive_attachments, extra_data={'attached_msg': True}, dispatch_to=['smtp'], ) attachment = ExtractedPayload(part.as_bytes(), attachment_meta) attachments.append(attachment) except Exception as err: errors.append(f'Failed extracting attachment: {err}') else: try: att_filename = mailpart.filename if not att_filename: att_filename = mailpart.sanitized_filename attachment_meta = PayloadMeta( should_archive=self.archive_attachments, extra_data={ 'charset': mailpart.charset, 'content-description': mailpart.part.get( 'Content-Description' ), 'content-id': mailpart.content_id, 'disposition': mailpart.disposition, 'filename': att_filename, 'type': mailpart.type, }, dispatch_to=self.always_dispatch, ) attachment = ExtractedPayload( mailpart.get_payload(), attachment_meta ) attachments.append(attachment) except Exception as err: errors.append(f'Failed extracting attachment: {err}') if self.extract_iocs: ioc_meta = PayloadMeta(should_archive=False, dispatch_to=['iocextract']) attachments.append(ExtractedPayload(ioc_content.encode(), ioc_meta)) return WorkerResponse(message_json, errors=errors, extracted=attachments)
async def scan(self, payload: Payload, request: Request) -> WorkerResponse: message_json: Dict[str, str] = {} attachments: List[ExtractedPayload] = [] errors: List[Error] = [] ioc_content: str = '' session = UnicodeDammit(payload.content).unicode_markup message = Parser(policy=policy.default).parsestr(session) try: # Check for invalid date string # https://bugs.python.org/issue30681 message.get('Date') except TypeError: date_header = [d[1] for d in message._headers if d[0] == 'Date'][0] date_header = dtparse(date_header).strftime('%c %z') message.replace_header('Date', date_header) # Create a dict of the SMTP headers for header, value in message.items(): curr_header = header.lower() if curr_header in message_json: message_json[curr_header] += f'\n{value}' else: message_json[curr_header] = value if not self.omit_body: message_json['body'] = self._get_body(message, 'plain') message_json['body_html'] = self._get_body(message, 'html') if self.extract_iocs: for k in self.ioc_keys: if k in message_json: ioc_content += f'\n{message_json[k]}' elif k == 'body' and k not in message_json: b = self._get_body(message, 'plain') if b: ioc_content += b elif k == 'body_html' and k not in message_json: b = self._get_body(message, 'html') if b: ioc_content += b for mailpart in message.iter_attachments(): if mailpart.get_content_type() == 'message/rfc822': for part in mailpart.get_payload(): try: attachment_meta = PayloadMeta( should_archive=self.archive_attachments, extra_data={ 'charset': part.get_content_charset(), 'content-description': part.get('Content-Description'), 'disposition': part.get_content_disposition(), 'filename': part.get_filename(), 'type': part.get_content_type(), }, dispatch_to=['smtp'], ) attachment = ExtractedPayload(part.as_bytes(), attachment_meta) attachments.append(attachment) except Exception as err: errors.append( Error( error=f'Failed rfc822 attachment: {err}', plugin_name=self.plugin_name, payload_id=payload.results.payload_id, )) else: try: attachment_meta = PayloadMeta( should_archive=self.archive_attachments, extra_data={ 'charset': mailpart.get_content_charset(), 'content-description': mailpart.get('Content-Description'), 'disposition': mailpart.get_content_disposition(), 'filename': mailpart.get_filename(), 'type': mailpart.get_content_type(), }, dispatch_to=self.always_dispatch, ) attachment = ExtractedPayload(mailpart.get_content(), attachment_meta) attachments.append(attachment) except Exception as err: errors.append( Error( error=f'Failed extracting attachment: {err}', plugin_name=self.plugin_name, payload_id=payload.results.payload_id, )) if self.extract_iocs: ioc_meta = PayloadMeta(should_archive=False, dispatch_to=['iocextract']) attachments.append(ExtractedPayload(ioc_content.encode(), ioc_meta)) return WorkerResponse(message_json, errors=errors, extracted=attachments)