async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
        """
        Return individual result from vtmis-filefeed provider

        """
        extracted: List[ExtractedPayload] = []
        errors: List[Error] = []
        results: Dict = json.loads(payload.content)
        if self.download:
            self.log.info(f'Downloading VTMIS sample sha1: {results["sha1"]}')
            try:
                response = requests.get(results['link'])
                response.raise_for_status()
                extracted = [ExtractedPayload(response.content)]
            except Exception as err:
                errors.append(
                    Error(
                        error=
                        f'Unable to download sample {results["sha1"]}: {err}',
                        plugin_name=self.plugin_name,
                        payload_id=payload.results.payload_id,
                    ))
        return WorkerResponse(results=results,
                              errors=errors,
                              extracted=extracted)
 async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
     extracted: List[ExtractedPayload] = []
     errors: List[Error] = []
     try:
         parsed_xml = parseString(payload.content)
     except ExpatError as err:
         errors.append(
             Error(
                 error=f'Unable to parse payload as XML with xdpcarve: {err}',
                 plugin_name=self.plugin_name,
                 payload_id=payload.results.payload_id,
             )
         )
         return WorkerResponse(errors=errors)
     for name in self.elements:
         dom_element = parsed_xml.getElementsByTagName(name)
         for dom in dom_element:
             content = dom.firstChild.nodeValue
             content = content.rstrip()
             try:
                 content = base64.b64decode(content)
             except:
                 pass
             meta = PayloadMeta(extra_data={'element_name': name})
             extracted.append(ExtractedPayload(content, meta))
     return WorkerResponse(extracted=extracted, errors=errors)
Example #3
0
    async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
        xorkey: Union[List[int], str, int,
                      None] = dpath.util.get(payload.dispatch_meta,
                                             '**/xorkey',
                                             default=None)

        if not xorkey:
            return
        elif isinstance(xorkey, str):
            xorkey = [int(k.strip()) for k in xorkey.split(',')]
        elif isinstance(xorkey, int):
            xorkey = [xorkey]

        last_rolling_index = len(xorkey) - 1
        current_rolling_index = 0
        payload_bytes = bytearray(payload.content)

        for index in range(payload.results.size):
            xor_value = xorkey[current_rolling_index]
            payload_bytes[index] ^= xor_value
            if current_rolling_index < last_rolling_index:
                current_rolling_index += 1
            else:
                current_rolling_index = 0

        payload.results.payload_meta.extra_data['xorkey'] = xorkey
        meta = PayloadMeta(extra_data={'xorkey': xorkey})
        extracted = [ExtractedPayload(bytes(payload_bytes), meta)]
        return WorkerResponse(extracted=extracted)
Example #4
0
    def scan(self, payload: Payload, request_meta: RequestMeta) -> WorkerResponse:
        """
        Upload content to a Tika server for automated text extraction

        """
        response = requests.put(self.tika_url, data=payload.content)
        response.raise_for_status()
        extracted = ExtractedPayload(response.content)
        return WorkerResponse(extracted=extracted)
Example #5
0
 async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
     extracted: List[ExtractedPayload] = []
     errors: List[Error] = []
     ole_object = olefile.OleFileIO(payload.content)
     streams = ole_object.listdir(streams=True)
     for stream in streams:
         try:
             stream_buffer = ole_object.openstream(stream).read()
             name = ''.join(
                 filter(lambda x: x in string.printable, '_'.join(stream)))
             if stream_buffer.endswith(b'\x01Ole10Native'):
                 ole_native = oleobj.OleNativeStream(stream_buffer)
                 if ole_native.filename:
                     name = f'{name}_{str(ole_native.filename)}'
                 else:
                     name = f'{name}_olenative'
                 meta = PayloadMeta(
                     should_archive=False,
                     extra_data={
                         'index': streams.index(stream),
                         'name': name
                     },
                 )
                 extracted.append(ExtractedPayload(ole_native.data, meta))
             else:
                 meta = PayloadMeta(
                     should_archive=False,
                     extra_data={
                         'index': streams.index(stream),
                         'name': name
                     },
                 )
                 extracted.append(ExtractedPayload(stream_buffer, meta))
         except Exception as err:
             errors.append(
                 Error(
                     error=str(err),
                     plugin_name=self.plugin_name,
                     payload_id=payload.payload_id,
                 ))
     return WorkerResponse(extracted=extracted, errors=errors)
Example #6
0
 async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
     extracted: List[ExtractedPayload] = []
     tnef_results = TNEF(payload.content)
     if tnef_results.attachments:
         for tnef_attachment in tnef_results.attachments:
             try:
                 filename = UnicodeDammit(
                     tnef_attachment.name).unicode_markup
             except:
                 filename = "None"
             tnef_meta = PayloadMeta(extra_data={'filename': filename})
             extracted.append(
                 ExtractedPayload(tnef_attachment.data, tnef_meta))
     return WorkerResponse(extracted=extracted)
Example #7
0
    async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
        extracted: List[ExtractedPayload] = []
        rtf = rtfobj.RtfObjParser(payload.content)
        rtf.parse()

        for obj_idx, obj in enumerate(rtf.objects):
            if obj.is_ole:
                data = obj.oledata
                meta = PayloadMeta(extra_data={'index': obj_idx})
            elif obj.is_package:
                data = obj.olepkgdata
                meta = PayloadMeta(extra_data={
                    'index': obj_idx,
                    'filename': obj.filename
                })
            else:
                data = obj.rawdata
                meta = PayloadMeta(extra_data={'index': obj_idx})
            extracted.append(ExtractedPayload(data, meta))
        return WorkerResponse(extracted=extracted)
Example #8
0
 def scan(self, payload: Payload,
          request_meta: RequestMeta) -> WorkerResponse:
     extracted = []
     errors = []
     try:
         parsed_xml = parseString(payload.content)
     except ExpatError as err:
         errors.append(
             f'Unable to parse payload as XML with xdpcarve: {err}')
         return WorkerResponse(errors=errors)
     for name in self.elements:
         dom_element = parsed_xml.getElementsByTagName(name)
         for dom in dom_element:
             content = dom.firstChild.nodeValue
             content = content.rstrip()
             try:
                 content = base64.b64decode(content)
             except:
                 pass
             meta = PayloadMeta(extra_data={"element_name": name})
             extracted.append(ExtractedPayload(content, meta))
     return WorkerResponse(extracted=extracted, errors=errors)
Example #9
0
    async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
        """
        Carve PE files from provided payload

        """

        extracted = []
        content = BytesIO(payload.content)
        content.seek(0)

        for start, end in self._carve(content):
            content.seek(start)
            try:
                pe = pefile.PE(data=content.read())
            except:
                continue
            meta = PayloadMeta(extra_data={'offset': start})
            extracted.append(ExtractedPayload(pe.trim(), meta))
            content.seek(0)
            pe.close()

        return WorkerResponse(extracted=extracted)
    def scan(self, payload: Payload,
             request_meta: RequestMeta) -> WorkerResponse:
        """
        Return individual result from vtmis-filefeed provider

        """
        extracted: Union[List[ExtractedPayload], None] = None
        errors: Union[List[str], None] = None
        results = json.loads(payload.content)
        if self.download:
            log.info(f'Downloading VTMIS sample sha1: {results["sha1"]}')
            try:
                response = requests.get(results['link'])
                response.raise_for_status()
                extracted = [ExtractedPayload(response.content)]
            except Exception as err:
                errors = [
                    f'Unable to download sample {results["sha1"]}: {err}'
                ]
        return WorkerResponse(results=results,
                              errors=errors,
                              extracted=extracted)
Example #11
0
    def decompress(self, content: BytesIO, offset: int = 0):
        """
        Extract and decompress an SWF object

        """
        errors = []
        meta = None
        swf = None
        try:
            """
            Header as obtained from SWF File Specification:
            Field Type Comment
            Signature UI8 Signature byte:
                - “F” indicates uncompressed
                - “C” indicates a zlib compressed SWF (SWF 6 and later only)
                - “Z” indicates a LZMA compressed SWF (SWF 13 and later only)
            - Signature UI8 Signature byte always “W”
            - Signature UI8 Signature byte always “S”
            - Version UI8 Single byte file version (for example, 0x06 for SWF 6)
            - FileLength UI32 Length of entire file in bytes
            """
            # Jump to the proper offset
            content.seek(offset)
            # Grab the first three bytes, should be FWS, CWS or ZWS
            magic = content.read(3).decode()
            # Grab the SWF version - 1 byte
            swf_version = struct.unpack('<b', content.read(1))[0]
            # Grab next 4 bytes so we can unpack to calculate the uncompressed
            # size of the payload.
            decompressed_size = struct.unpack("<i", content.read(4))[0] - 8
            # Let's go back to the offset byte, jumping beyond the SWF header
            content.seek(offset + 3)
            # Make sure our header is that of a decompressed SWF plus the
            # original version and size headers
            composite_header = b'FWS' + content.read(5)
            # Determine the compression type, ZLIB or LZMA, then decompress the
            # payload size minus 8 bytes of original header
            try:
                if magic == "ZWS":
                    content.seek(12)
                    content = pylzma.decompress(
                        content.read(decompressed_size))
                elif magic == "CWS":
                    content = zlib.decompress(content.read(decompressed_size))
                elif magic == 'FWS':
                    # Not compressed, but let's return the payload based on the
                    # size defined in the header
                    content = content.read(decompressed_size)
                else:
                    return None, errors
            except:
                return None, errors

            if len(content) != decompressed_size:
                errors.append(
                    'Invalid size of carved SWF content: {len(content)} != {decompressed_size}'
                )
            else:
                swf = composite_header + content
                meta = PayloadMeta(extra_data={
                    'offset': offset,
                    'swf_version': swf_version
                })
                extracted = ExtractedPayload(swf, meta)
        except:
            errors.append(
                f'Unable to decompress SWF payload at offset {offset}')
        return extracted, errors
Example #12
0
    def scan(self, payload: Payload,
             request_meta: RequestMeta) -> WorkerResponse:
        """
        Decompress a payload

        request_meta:
            - passwords
            - archiver
        """

        if len(payload.content) > self.maximum_size:
            raise StoqPluginException(
                f'Compressed file too large: {len(payload.content)} > {self.maximum_size}'
            )

        archiver = None
        mimetype = None
        results = {}
        errors = []
        extracted = []
        passwords = request_meta.extra_data.get('passwords', self.passwords)
        if isinstance(passwords, str):
            passwords = [p.strip() for p in passwords.split(',')]

        # Determine the mimetype of the payload so we can identify the
        # correct archiver. This should either be based off the request_meta
        # (useful when payload is passed via dispatching) or via magic
        if 'archiver' in request_meta.extra_data:
            if request_meta.extra_data['archiver'] in self.ARCHIVE_CMDS:
                archiver = self.ARCHIVE_CMDS[
                    request_meta.extra_data['archiver']]
            else:
                raise StoqPluginException(
                    f"Unknown archive type of {request_meta['archiver']}")
        else:
            mimetype = magic.from_buffer(payload.content, mime=True)
            if mimetype in self.ARCHIVE_MAGIC:
                archive_type = self.ARCHIVE_MAGIC[mimetype]
                if archive_type in self.ARCHIVE_CMDS:
                    archiver = self.ARCHIVE_CMDS[archive_type]
                else:
                    raise StoqPluginException(
                        f'Unknown archive type of {archive_type}')
        if not archiver:
            raise StoqPluginException(
                f'Unable to determine archive type, mimetype: {mimetype}')

        with tempfile.TemporaryDirectory() as extract_dir:
            fd, archive_file = tempfile.mkstemp(dir=extract_dir)
            with open(fd, 'xb') as f:
                f.write(payload.content)
                f.flush()
            archive_outdir = tempfile.mkdtemp(dir=extract_dir)
            for password in passwords:
                cmd = archiver.replace('%INFILE%', shlex.quote(archive_file))
                cmd = cmd.replace('%OUTDIR%', shlex.quote(archive_outdir))
                cmd = cmd.replace('%PASSWORD%', shlex.quote(password))
                cmd = cmd.split(" ")
                p = Popen(cmd,
                          stdout=PIPE,
                          stderr=PIPE,
                          universal_newlines=True)
                try:
                    outs, errs = p.communicate(timeout=self.timeout)
                except TimeoutExpired:
                    p.kill()
                    raise StoqPluginException(
                        'Timed out decompressing payload')
                if p.returncode == 0:
                    break

            for root, dirs, files in os.walk(archive_outdir):
                for f in files:
                    path = os.path.join(extract_dir, root, f)
                    if os.path.getsize(path) > self.maximum_size:
                        errors.append(
                            f'Extracted object is too large ({os.path.getsize(path)} > {self.maximum_size})'
                        )
                        continue
                    with open(path, "rb") as extracted_file:
                        meta = PayloadMeta(extra_data={'filename': f})
                        try:
                            data = extracted_file.read()
                        except OSError as err:
                            errors.append(
                                f'Unable to access extracted content: {err}')
                            continue
                        extracted.append(ExtractedPayload(data, meta))
        return WorkerResponse(results, errors=errors, extracted=extracted)
Example #13
0
    def scan(self, payload: Payload,
             request_meta: RequestMeta) -> WorkerResponse:
        message_json: Dict[str, str] = {}
        attachments: List[ExtractedPayload] = []
        errors: List[str] = []
        ioc_content: str = ''
        session = UnicodeDammit(payload.content).unicode_markup
        message = Parser(policy=policy.default).parsestr(session)

        # Create a dict of the SMTP headers
        for header, value in message.items():
            curr_header = header.lower()
            if curr_header in message_json:
                message_json[curr_header] += f'\n{value}'
            else:
                message_json[curr_header] = value

        if not self.omit_body:
            message_json['body'] = self._get_body(message, 'plain')
            message_json['body_html'] = self._get_body(message, 'html')

        if self.extract_iocs:
            for k in self.ioc_keys:
                if k in message_json:
                    ioc_content += f'\n{message_json[k]}'
                elif k == 'body' and k not in message_json:
                    b = self._get_body(message, 'plain')
                    if b:
                        ioc_content += b
                elif k == 'body_html' and k not in message_json:
                    b = self._get_body(message, 'html')
                    if b:
                        ioc_content += b

        for mailpart in message.iter_attachments():
            if mailpart.get_content_type() == 'message/rfc822':
                for part in mailpart.get_payload():
                    try:
                        attachment_meta = PayloadMeta(
                            should_archive=self.archive_attachments,
                            extra_data={
                                'charset':
                                part.get_content_charset(),
                                'content-description':
                                part.get('Content-Description'),
                                'disposition':
                                part.get_content_disposition(),
                                'filename':
                                part.get_filename(),
                                'type':
                                part.get_content_type(),
                            },
                            dispatch_to=['smtp'],
                        )
                        attachment = ExtractedPayload(part.as_bytes(),
                                                      attachment_meta)
                        attachments.append(attachment)
                    except Exception as err:
                        errors.append(f'Failed rfc822 attachment: {err}')
            else:
                try:
                    attachment_meta = PayloadMeta(
                        should_archive=self.archive_attachments,
                        extra_data={
                            'charset':
                            mailpart.get_content_charset(),
                            'content-description':
                            mailpart.get('Content-Description'),
                            'disposition':
                            mailpart.get_content_disposition(),
                            'filename':
                            mailpart.get_filename(),
                            'type':
                            mailpart.get_content_type(),
                        },
                        dispatch_to=self.always_dispatch,
                    )
                    attachment = ExtractedPayload(mailpart.get_content(),
                                                  attachment_meta)
                    attachments.append(attachment)
                except Exception as err:
                    errors.append(f'Failed extracting attachment: {err}')
        if self.extract_iocs:
            ioc_meta = PayloadMeta(should_archive=False,
                                   dispatch_to=['iocextract'])
            attachments.append(ExtractedPayload(ioc_content.encode(),
                                                ioc_meta))
        return WorkerResponse(message_json,
                              errors=errors,
                              extracted=attachments)
Example #14
0
    def scan(self, payload: Payload, request_meta: RequestMeta) -> WorkerResponse:

        message_json = {}
        attachments = []
        errors = []
        ioc_content = ''
        email_session = UnicodeDammit(payload.content).unicode_markup
        message = pyzmail.message_from_string(email_session)

        # Create a dict of the SMTP headers
        for header in message.keys():
            curr_header = header.lower()
            if curr_header in message_json:
                # If the header key already exists, let's join them
                message_json[curr_header] += f'\n{message.get_decoded_header(header)}'
            else:
                message_json[curr_header] = message.get_decoded_header(header)

        if not self.omit_body:
            # Extract the e-mail body, to include HTML if available
            message_json['body'] = (
                ''
                if message.text_part is None
                else UnicodeDammit(message.text_part.get_payload()).unicode_markup
            )
            message_json['body_html'] = (
                ''
                if message.html_part is None
                else UnicodeDammit(message.html_part.get_payload()).unicode_markup
            )

        if self.extract_iocs:
            for k in self.ioc_keys:
                if k in message_json:
                    ioc_content += f'{message_json[k]}\n'

        # Handle attachments
        for mailpart in message.mailparts:
            # Skip if the attachment is a body part
            if mailpart.is_body:
                if self.extract_iocs:
                    ioc_content += UnicodeDammit(mailpart.get_payload()).unicode_markup
            elif mailpart.type.startswith('message/'):
                for part in mailpart.part.get_payload():
                    try:
                        attachment_meta = PayloadMeta(
                            should_archive=self.archive_attachments,
                            extra_data={'attached_msg': True},
                            dispatch_to=['smtp'],
                        )
                        attachment = ExtractedPayload(part.as_bytes(), attachment_meta)
                        attachments.append(attachment)
                    except Exception as err:
                        errors.append(f'Failed extracting attachment: {err}')
            else:
                try:
                    att_filename = mailpart.filename
                    if not att_filename:
                        att_filename = mailpart.sanitized_filename
                    attachment_meta = PayloadMeta(
                        should_archive=self.archive_attachments,
                        extra_data={
                            'charset': mailpart.charset,
                            'content-description': mailpart.part.get(
                                'Content-Description'
                            ),
                            'content-id': mailpart.content_id,
                            'disposition': mailpart.disposition,
                            'filename': att_filename,
                            'type': mailpart.type,
                        },
                        dispatch_to=self.always_dispatch,
                    )
                    attachment = ExtractedPayload(
                        mailpart.get_payload(), attachment_meta
                    )
                    attachments.append(attachment)
                except Exception as err:
                    errors.append(f'Failed extracting attachment: {err}')

        if self.extract_iocs:
            ioc_meta = PayloadMeta(should_archive=False, dispatch_to=['iocextract'])
            attachments.append(ExtractedPayload(ioc_content.encode(), ioc_meta))

        return WorkerResponse(message_json, errors=errors, extracted=attachments)
Example #15
0
    async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
        message_json: Dict[str, str] = {}
        attachments: List[ExtractedPayload] = []
        errors: List[Error] = []
        ioc_content: str = ''
        session = UnicodeDammit(payload.content).unicode_markup
        message = Parser(policy=policy.default).parsestr(session)

        try:
            # Check for invalid date string
            # https://bugs.python.org/issue30681
            message.get('Date')
        except TypeError:
            date_header = [d[1] for d in message._headers if d[0] == 'Date'][0]
            date_header = dtparse(date_header).strftime('%c %z')
            message.replace_header('Date', date_header)

        # Create a dict of the SMTP headers
        for header, value in message.items():
            curr_header = header.lower()
            if curr_header in message_json:
                message_json[curr_header] += f'\n{value}'
            else:
                message_json[curr_header] = value

        if not self.omit_body:
            message_json['body'] = self._get_body(message, 'plain')
            message_json['body_html'] = self._get_body(message, 'html')

        if self.extract_iocs:
            for k in self.ioc_keys:
                if k in message_json:
                    ioc_content += f'\n{message_json[k]}'
                elif k == 'body' and k not in message_json:
                    b = self._get_body(message, 'plain')
                    if b:
                        ioc_content += b
                elif k == 'body_html' and k not in message_json:
                    b = self._get_body(message, 'html')
                    if b:
                        ioc_content += b

        for mailpart in message.iter_attachments():
            if mailpart.get_content_type() == 'message/rfc822':
                for part in mailpart.get_payload():
                    try:
                        attachment_meta = PayloadMeta(
                            should_archive=self.archive_attachments,
                            extra_data={
                                'charset':
                                part.get_content_charset(),
                                'content-description':
                                part.get('Content-Description'),
                                'disposition':
                                part.get_content_disposition(),
                                'filename':
                                part.get_filename(),
                                'type':
                                part.get_content_type(),
                            },
                            dispatch_to=['smtp'],
                        )
                        attachment = ExtractedPayload(part.as_bytes(),
                                                      attachment_meta)
                        attachments.append(attachment)
                    except Exception as err:
                        errors.append(
                            Error(
                                error=f'Failed rfc822 attachment: {err}',
                                plugin_name=self.plugin_name,
                                payload_id=payload.results.payload_id,
                            ))
            else:
                try:
                    attachment_meta = PayloadMeta(
                        should_archive=self.archive_attachments,
                        extra_data={
                            'charset':
                            mailpart.get_content_charset(),
                            'content-description':
                            mailpart.get('Content-Description'),
                            'disposition':
                            mailpart.get_content_disposition(),
                            'filename':
                            mailpart.get_filename(),
                            'type':
                            mailpart.get_content_type(),
                        },
                        dispatch_to=self.always_dispatch,
                    )
                    attachment = ExtractedPayload(mailpart.get_content(),
                                                  attachment_meta)
                    attachments.append(attachment)
                except Exception as err:
                    errors.append(
                        Error(
                            error=f'Failed extracting attachment: {err}',
                            plugin_name=self.plugin_name,
                            payload_id=payload.results.payload_id,
                        ))
        if self.extract_iocs:
            ioc_meta = PayloadMeta(should_archive=False,
                                   dispatch_to=['iocextract'])
            attachments.append(ExtractedPayload(ioc_content.encode(),
                                                ioc_meta))
        return WorkerResponse(message_json,
                              errors=errors,
                              extracted=attachments)
Example #16
0
 async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
     decoded_content = base64.b64decode(payload.content)
     extracted = [ExtractedPayload(decoded_content)]
     return WorkerResponse(extracted=extracted)