Example #1
0
    def scan(self, data, file, options, expire_at):
        self.event['total'] = {'attachments': 0, 'extracted': 0}
        self.event.setdefault('object_names', [])

        tnef = tnefparse.TNEF(data)
        tnef_objects = getattr(tnef, 'objects', [])
        for tnef_object in tnef_objects:
            descriptive_name = tnefparse.TNEF.codes.get(tnef_object.name)
            if descriptive_name not in self.event['object_names']:
                self.event['object_names'].append(descriptive_name)

            try:
                object_data = tnef_object.data.strip(b'\0') or None
            except:
                object_data = tnef_object.data

            if object_data is not None:
                if descriptive_name == 'Subject':
                    self.event['subject'] = object_data
                elif descriptive_name == 'Message ID':
                    self.event['message_id'] = object_data
                elif descriptive_name == 'Message Class':
                    self.event['message_class'] = object_data

        tnef_attachments = getattr(tnef, 'attachments', [])
        self.event['total']['attachments'] = len(tnef_attachments)
        for attachment in tnef_attachments:
            extract_file = strelka.File(
                name=attachment.name.decode(),
                source=self.name,
            )

            for c in strelka.chunk_string(attachment.data):
                self.upload_to_coordinator(
                    extract_file.pointer,
                    c,
                    expire_at,
                )

            self.files.append(extract_file)
            self.event['total']['extracted'] += 1

        tnef_html = getattr(tnef, 'htmlbody', None)
        if tnef_html is not None:
            extract_file = strelka.File(
                name='htmlbody',
                source=self.name,
            )

            for c in strelka.chunk_string(tnef_html):
                self.upload_to_coordinator(
                    extract_file.pointer,
                    c,
                    expire_at,
                )

            self.files.append(extract_file)
Example #2
0
    def scan(self, data, file, options, expire_at):
        file_limit = options.get('limit', 1000)

        self.event['total'] = {'objects': 0, 'extracted': 0}

        rtf = rtfobj.RtfObjParser(data)
        rtf.parse()
        self.event['total']['objects'] = len(rtf.objects)

        for object in rtf.objects:
            if self.event['total']['extracted'] >= file_limit:
                break

            index = rtf.server.index(object)
            if object.is_package:
                extract_file = strelka.File(
                    name=object.filename,
                    source=self.name,
                )

                for c in strelka.chunk_string(object.olepkgdata):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

            elif object.is_ole:
                extract_file = strelka.File(
                    name=f'object_{index}',
                    source=self.name,
                )

                for c in strelka.chunk_string(object.oledata):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

            else:
                extract_file = strelka.File(
                    name=f'object_{index}',
                    source=self.name,
                )

                for c in strelka.chunk_string(object.rawdata):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

            self.files.append(extract_file)
            self.event['total']['extracted'] += 1
Example #3
0
    def scan(self, data, file, options, expire_at):
        self.event['total'] = {'streams': 0, 'extracted': 0}

        try:
            ole = olefile.OleFileIO(data)
            ole_streams = ole.listdir(streams=True)
            self.event['total']['streams'] = len(ole_streams)
            for stream in ole_streams:
                file = ole.openstream(stream)
                extract_data = file.read()
                extract_name = f'{"_".join(stream)}'
                extract_name = re.sub(r'[\x00-\x1F]', '', extract_name)
                if extract_name.endswith('Ole10Native'):
                    native_stream = oletools.oleobj.OleNativeStream(
                        bindata=extract_data,
                    )
                    if native_stream.filename:
                        extract_name = extract_name + f'_{str(native_stream.filename)}'
                    else:
                        extract_name = extract_name + '_native_data'

                    extract_file = strelka.File(
                        name=extract_name,
                        source=self.name,
                    )

                    for c in strelka.chunk_string(native_stream.data):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                else:
                    extract_file = strelka.File(
                        name=extract_name,
                        source=self.name,
                    )

                    for c in strelka.chunk_string(extract_data):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                self.files.append(extract_file)
                self.event['total']['extracted'] += 1

        except OSError:
            self.flags.append('os_error')
        finally:
            ole.close()
Example #4
0
    def scan(self, data, file, options, expire_at):
        tmp_directory = options.get('tmp_directory', '/tmp/')

        self.event['total'] = {'certificates': 0, 'extracted': 0}

        with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data:
            tmp_data.write(data)
            tmp_data.flush()

            if data[:1] == b'0':
                pkcs7 = SMIME.load_pkcs7_der(tmp_data.name)
            else:
                pkcs7 = SMIME.load_pkcs7(tmp_data.name)

            certs = pkcs7.get0_signers(X509.X509_Stack())
            if certs:
                self.event['total']['certificates'] = len(certs)
                for cert in certs:
                    extract_file = strelka.File(
                        name=f'sn_{cert.get_serial_number()}',
                        source=self.name,
                    )

                    for c in strelka.chunk_string(cert.as_der()):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
                    self.event['total']['extracted'] += 1
Example #5
0
    def scan(self, data, file, options, expire_at):
        try:
            with io.BytesIO(data) as lzma_io:
                with lzma.LZMAFile(filename=lzma_io) as lzma_obj:
                    try:
                        decompressed_file = lzma_obj.read()
                        decompressed_size = len(decompressed_file)
                        self.event['decompressed_size'] = decompressed_size

                        extract_file = strelka.File(source=self.name, )

                        for c in strelka.chunk_string(decompressed_file):
                            self.upload_to_coordinator(
                                extract_file.pointer,
                                c,
                                expire_at,
                            )

                        self.files.append(extract_file)

                    except EOFError:
                        self.flags.append('eof_error')

        except lzma.LZMAError:
            self.flags.append('lzma_error')
Example #6
0
    def scan(self, data, file, options, expire_at):
        file_limit = options.get('limit', 1000)

        self.event['total'] = {'files': 0, 'extracted': 0}

        with io.BytesIO(data) as rar_io:
            with rarfile.RarFile(rar_io) as rar_obj:
                rf_info_list = rar_obj.infolist()
                self.event['total']['files'] = len(rf_info_list)
                for rf_object in rf_info_list:
                    if not rf_object.isdir():
                        if self.event['total']['extracted'] >= file_limit:
                            break

                        file_info = rar_obj.getinfo(rf_object)
                        if not file_info.needs_password():
                            self.event['host_os'] = HOST_OS_MAPPING[file_info.host_os]

                            extract_file = strelka.File(
                                name=f'{file_info.filename}',
                                source=self.name,
                            )

                            for c in strelka.chunk_string(rar_obj.read(rf_object)):
                                self.upload_to_coordinator(
                                    extract_file.pointer,
                                    c,
                                    expire_at,
                                )

                            self.files.append(extract_file)
                            self.event['total']['extracted'] += 1

                        else:
                            self.flags.append('password_protected')
Example #7
0
    def scan(self, data, file, options, expire_at):
        tmp_directory = options.get('tmp_directory', '/tmp/')

        with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data:
            tmp_data.write(data)
            tmp_data.flush()

            upx_return = subprocess.call(
                ['upx', '-d', tmp_data.name, '-o', f'{tmp_data.name}_upx'],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL)
            if upx_return == 0:
                with open(f'{tmp_data.name}_upx', 'rb') as upx_fin:
                    upx_file = upx_fin.read()
                    upx_size = len(upx_file)
                    if upx_size > len(data):
                        extract_file = strelka.File(source=self.name, )
                        for c in strelka.chunk_string(upx_file):
                            self.upload_to_coordinator(
                                extract_file.pointer,
                                c,
                                expire_at,
                            )
                        self.files.append(extract_file)

                os.remove(f'{tmp_data.name}_upx')

            else:
                self.flags.append(f'return_code_{upx_return}')
Example #8
0
		def scan(self, data, file, options, expire_at):
				
				jtr_path = options.get('jtr_path', '/jtr/')
				tmp_directory = options.get('tmp_file_directory', '/tmp/')
				file_limit = options.get('limit', 1000)
				password_file = options.get('password_file', '/etc/strelka/passwords.dat')
				log_extracted_pws = options.get('log_pws', False)
				scanner_timeout = options.get('scanner_timeout', 150)
				brute = options.get('brute_force', False)
				max_length = options.get('max_length', 5)

				self.event['total'] = {'files': 0, 'extracted': 0}

				with io.BytesIO(data) as zip_io:
						try:
								with zipfile.ZipFile(zip_io) as zip_obj:
										name_list = zip_obj.namelist()
										self.event['total']['files'] = len(name_list)

										extracted_pw = crack_zip(self, data, jtr_path, tmp_directory, brute=brute, scanner_timeout=scanner_timeout, max_length=max_length, password_file=password_file)
										if not extracted_pw:
												self.flags.append('Could not extract password')
												return
										if log_extracted_pws:
												self.event['cracked_password'] = extracted_pw
										for i, name in enumerate(name_list):
												if not name.endswith('/'):
														if self.event['total']['extracted'] >= file_limit:
																break

														try:
																extract_data = zip_obj.read(name, extracted_pw)

																if extract_data:
																		extract_file = strelka.File(
																				name=name,
																				source=self.name,
																		)

																		for c in strelka.chunk_string(extract_data):
																				self.upload_to_coordinator(
																						extract_file.pointer,
																						c,
																						expire_at,
																				)

																		self.files.append(extract_file)
																		self.event['total']['extracted'] += 1

														except NotImplementedError:
																self.flags.append('unsupported_compression')
														except RuntimeError:
																self.flags.append('runtime_error')
														except ValueError:
																self.flags.append('value_error')
														except zlib.error:
																self.flags.append('zlib_error')

						except zipfile.BadZipFile:
								self.flags.append('bad_zip')
Example #9
0
    def scan(self, data, file, options, expire_at):
        tmp_directory = options.get('tmp_directory', '/tmp/')

        with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data:
            tmp_data.write(data)
            tmp_data.flush()

            (stdout, stderr) = subprocess.Popen(
                ['antiword', tmp_data.name],
                stdout=subprocess.PIPE,
                stderr=subprocess.DEVNULL
            ).communicate()

            if stdout:
                extract_file = strelka.File(
                    name='text',
                    source=self.name,
                )

                for c in strelka.chunk_string(stdout):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

                self.files.append(extract_file)
Example #10
0
    def scan(self, data, file, options, expire_at):

        with io.BytesIO(data) as encoded_file:
            extract_data = b''

            try:
                extract_data = base64.b64decode(encoded_file.read())
                self.event['decoded_header'] = extract_data[:50]
            except binascii.Error:
                self.flags.append('not_decodable_from_base64')
            
            if extract_data:

                extract_file = strelka.File(
                    source=self.name,
                )

                for c in strelka.chunk_string(extract_data):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

                self.files.append(extract_file)
Example #11
0
    def scan(self, data, file, options, expire_at):
        with io.BytesIO(data) as swf_io:
            swf_io.seek(4)
            swf_size = struct.unpack('<i', swf_io.read(4))[0]
            swf_io.seek(0)
            magic = swf_io.read(3)
            extract_data = b'FWS' + swf_io.read(5)

            if magic == b'CWS':
                self.event['type'] = 'CWS'
                try:
                    extract_data += zlib.decompress(swf_io.read())[:swf_size - 8]
                    extract_file = strelka.File(
                        source=self.name,
                    )

                    for c in strelka.chunk_string(extract_data):
                        self.upload_to_cache(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)

                except zlib.error:
                    self.flags.append('zlib_error')

            elif magic == b'ZWS':
                self.event['type'] = 'ZWS'
                swf_io.seek(12)
                extract_data += pylzma.decompress(swf_io.read())[:swf_size - 8]
                extract_file = strelka.File(
                    source=self.name,
                )

                for c in strelka.chunk_string(extract_data):
                    self.upload_to_cache(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

                self.files.append(extract_file)

            elif magic == b'FWS':
                self.event['type'] = 'FWS'
Example #12
0
    def scan(self, data, file, options, expire_at):
        file_limit = options.get('limit', 1000)

        self.event['total'] = {'files': 0, 'extracted': 0}

        with io.BytesIO(data) as zip_io:
            try:
                with zipfile.ZipFile(zip_io) as zip_obj:
                    name_list = zip_obj.namelist()
                    self.event['total']['files'] = len(name_list)

                    for i, name in enumerate(name_list):
                        if not name.endswith('/'):
                            if self.event['total']['extracted'] >= file_limit:
                                break

                            try:
                                extract_data = b''
                                zinfo = zip_obj.getinfo(name)

                                if zinfo.flag_bits & 0x1:
                                    if i == 0:
                                        self.flags.append('encrypted')

                                else:
                                    extract_data = zip_obj.read(name)

                                if extract_data:
                                    extract_file = strelka.File(
                                        name=name,
                                        source=self.name,
                                    )

                                    for c in strelka.chunk_string(
                                            extract_data):
                                        self.upload_to_coordinator(
                                            extract_file.pointer,
                                            c,
                                            expire_at,
                                        )

                                    self.files.append(extract_file)
                                    self.event['total']['extracted'] += 1

                            except NotImplementedError:
                                self.flags.append('unsupported_compression')
                            except RuntimeError:
                                self.flags.append('runtime_error')
                            except ValueError:
                                self.flags.append('value_error')
                            except zlib.error:
                                self.flags.append('zlib_error')

            except zipfile.BadZipFile:
                self.flags.append('bad_zip')
Example #13
0
    def scan(self, data, file, options, expire_at):
        analyze_macros = options.get('analyze_macros', True)

        self.event['total'] = {'files': 0, 'extracted': 0}

        try:
            vba = olevba3.VBA_Parser(filename=file.name, data=data)
            if vba.detect_vba_macros():
                extract_macros = list(vba.extract_macros())
                self.event['total']['files'] = len(extract_macros)
                for (filename, stream_path, vba_filename,
                     vba_code) in extract_macros:
                    extract_file = strelka.File(
                        name=f'{vba_filename}',
                        source=self.name,
                    )

                    for c in strelka.chunk_string(vba_code):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
                    self.event['total']['extracted'] += 1

                if analyze_macros:
                    self.event.setdefault('auto_exec', [])
                    self.event.setdefault('base64', [])
                    self.event.setdefault('dridex', [])
                    self.event.setdefault('hex', [])
                    self.event.setdefault('ioc', [])
                    self.event.setdefault('suspicious', [])
                    macros = vba.analyze_macros()
                    for (macro_type, keyword, description) in macros:
                        if macro_type == 'AutoExec':
                            self.event['auto_exec'].append(keyword)
                        elif macro_type == 'Base64 String':
                            self.event['base64'].append(keyword)
                        elif macro_type == 'Dridex String':
                            self.event['dridex'].append(keyword)
                        elif macro_type == 'Hex String':
                            self.event['hex'].append(keyword)
                        elif macro_type == 'IOC':
                            self.event['ioc'].append(keyword)
                        elif macro_type == 'Suspicious':
                            self.event['suspicious'].append(keyword)

        except olevba3.FileOpenError:
            self.flags.append('file_open_error')
        finally:
            # TODO referenced before potential assignment as vba is opened in a try / catch block
            vba.close()
Example #14
0
    def scan(self, data, file, options, expire_at):
        password_file = options.get('password_file', '/etc/strelka/passwords.dat')

        if not self.passwords:
            if os.path.isfile(password_file):
                with open(password_file, 'rb') as f:
                    for line in f:
                        self.passwords.append(line.strip())

        with io.BytesIO(data) as doc_io:

            msoff_doc = msoffcrypto.OfficeFile(doc_io)
            output_doc = io.BytesIO()
            password = ''
            extract_data = b''

            if msoff_doc.is_encrypted():             
                self.flags.append('password_protected')
                
                for pw in self.passwords:
                    if not password:
                        try:
                            msoff_doc.load_key(password=pw.decode('utf-8'))
                            output_doc.seek(0)
                            msoff_doc.decrypt(output_doc)
                            output_doc.seek(0)

                            if output_doc.readable():
                                extract_data = output_doc.read()
                                password = pw.decode('utf-8')
                                break

                        except Exception:
                            pass

            if password:
                self.event['password'] = password
                
                extract_file = strelka.File(
                    source=self.name,
                )

                for c in strelka.chunk_string(extract_data):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

                self.files.append(extract_file)
            else:
                self.flags.append('no_password_match_found')
Example #15
0
    def scan(self, data, file, options, expire_at):
        decoded = base64.b64decode(data)

        extract_file = strelka.File(source=self.name, )

        for c in strelka.chunk_string(decoded):
            self.upload_to_coordinator(
                extract_file.pointer,
                c,
                expire_at,
            )

        self.files.append(extract_file)
Example #16
0
    def scan(self, data, file, options, expire_at):
        headers = options.get('headers', [])

        self.event['total'] = {'parts': 0, 'extracted': 0}

        try:
            message = email.message_from_string(data.decode(
                'UTF-8', 'replace'))

            self.event['headers'] = []
            for h, v in message.items():
                if headers and h not in headers:
                    continue

                self.event['headers'].append({
                    'header': h,
                    'value': v,
                })

            self.event['parts'] = []
            for (index, part) in enumerate(message.walk()):
                self.event['total']['parts'] += 1
                extract_data = part.get_payload(decode=True)
                if extract_data is not None:
                    part_filename = part.get_filename()
                    if part_filename is not None:
                        extract_name = f'{part_filename}'
                        self.event['parts'].append(part_filename)
                    else:
                        extract_name = f'part_{index}'

                    extract_file = strelka.File(
                        name=extract_name,
                        source=self.name,
                    )
                    extract_file.add_flavors(
                        {'external': [part.get_content_type()]})

                    for c in strelka.chunk_string(extract_data):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
                    self.event['total']['extracted'] += 1

        except AssertionError:
            self.flags.append('assertion_error')
Example #17
0
    def _recurse_node(self, node, xml_args):
        """Recursively parses XML file.

        The XML file is recursively parsed down every node tree.

        Args:
            node: node to be recursively parsed.
            xml_args: options set by the scanner that affect XMl parsing.
        """
        if node is not None:
            if hasattr(node.tag, '__getitem__'):
                if node.tag.startswith('{'):
                    namespace, separator, tag = node.tag[1:].partition('}')
                else:
                    namespace = None
                    tag = node.tag

                self.event['total']['tags'] += 1
                if namespace not in self.event['namespaces']:
                    self.event['namespaces'].append(namespace)
                if tag not in self.event['tags']:
                    self.event['tags'].append(tag)

                text = node.attrib.get('name', node.text)
                if text is not None:
                    if tag in xml_args['metadata_tags']:
                        tag_data = {'tag': tag, 'text': text.strip()}
                        if tag_data not in self.event['tag_data']:
                            self.event['tag_data'].append(tag_data)
                    elif tag in xml_args['extract_tags']:
                        extract_file = strelka.File(
                            name=tag,
                            source=self.name,
                        )

                        for c in strelka.chunk_string(text):
                            self.upload_to_coordinator(
                                extract_file.pointer,
                                c,
                                self.expire_at,
                            )

                        self.files.append(extract_file)
                        self.event['total']['extracted'] += 1

            for child in node.getchildren():
                self._recurse_node(self, child, xml_args)

        return
Example #18
0
    def scan(self, data, file, options, expire_at):
        with io.BytesIO(data) as gzip_io:
            with gzip.GzipFile(fileobj=gzip_io) as gzip_obj:
                decompressed = gzip_obj.read()
                self.event['size'] = len(decompressed)

                extract_file = strelka.File(source=self.name, )

                for c in strelka.chunk_string(decompressed):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

                self.files.append(extract_file)
Example #19
0
    def scan(self, data, file, options, expire_at):
        decompressed = zlib.decompress(data)
        self.event["size"] = len(decompressed)

        extract_file = strelka.File(
            source=self.name,
        )

        for c in strelka.chunk_string(decompressed):
            self.upload_to_coordinator(
                extract_file.pointer,
                c,
                expire_at,
            )

        self.files.append(extract_file)
Example #20
0
    def scan(self, data, file, options, expire_at):

        jtr_path = options.get('jtr_path', '/jtr/')
        tmp_directory = options.get('tmp_file_directory', '/tmp/')
        password_file = options.get('password_file',
                                    '/etc/strelka/passwords.dat')
        log_extracted_pws = options.get('log_pws', False)
        scanner_timeout = options.get('scanner_timeout', 150)
        brute = options.get('brute_force', False)
        max_length = options.get('max_length', 5)

        with io.BytesIO(data) as doc_io:

            msoff_doc = msoffcrypto.OfficeFile(doc_io)
            output_doc = io.BytesIO()
            if extracted_pw := crack_word(self,
                                          data,
                                          jtr_path,
                                          tmp_directory,
                                          brute=brute,
                                          scanner_timeout=scanner_timeout,
                                          max_length=max_length,
                                          password_file=password_file):
                if log_extracted_pws:
                    self.event['cracked_password'] = extracted_pw
                try:
                    msoff_doc.load_key(password=extracted_pw.decode('utf-8'))
                    msoff_doc.decrypt(output_doc)
                    output_doc.seek(0)
                    extract_data = output_doc.read()
                    output_doc.seek(0)
                    extract_file = strelka.File(source=self.name, )

                    for c in strelka.chunk_string(extract_data):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
                except:
                    self.flags.append(
                        'Could not decrypt document with recovered password')

            else:
Example #21
0
    def scan(self, data, file, options, expire_at):
        self.event['total'] = {'parts': 0, 'extracted': 0}

        try:
            message = email.message_from_string(data.decode(
                'UTF-8', 'replace'))

            self.event.setdefault('headers', [])
            for (key, value) in message.items():
                normalized_value = strelka.normalize_whitespace(value.strip())
                header_entry = {'header': key, 'value': normalized_value}
                if header_entry not in self.event['headers']:
                    self.event['headers'].append(header_entry)

            self.event.setdefault('parts', [])
            for (index, part) in enumerate(message.walk()):
                self.event['total']['parts'] += 1
                extract_data = part.get_payload(decode=True)
                if extract_data is not None:
                    part_filename = part.get_filename()
                    if part_filename is not None:
                        extract_name = f'{part_filename}'
                        self.event['parts'].append(part_filename)
                    else:
                        extract_name = f'part_{index}'

                    extract_file = strelka.File(
                        name=extract_name,
                        source=self.name,
                    )
                    extract_file.add_flavors(
                        {'external': [part.get_content_type()]})

                    for c in strelka.chunk_string(extract_data):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
                    self.event['total']['extracted'] += 1

        except AssertionError:
            self.flags.append('assertion_error')
Example #22
0
    def scan(self, data, file, options, expire_at):
        expectedSize = int.from_bytes(data[2:6], "little")
        actualSize = len(data)
        if expectedSize != actualSize:
            self.event['trailer_index'] = expectedSize
            trailer_bytes_data = data[expectedSize:]
            extract_file = strelka.File(
                source=self.name,
            )

            for c in strelka.chunk_string(trailer_bytes_data):
                self.upload_to_coordinator(
                    extract_file.pointer,
                    c,
                    expire_at,
                )
            self.event['BMP_EOF'] = data[expectedSize:]
            self.files.append(extract_file)
        else:
            self.flags.append('no_trailer')
Example #23
0
    def scan(self, data, file, options, expire_at):
        if not data.endswith(b'\xff\xd9'):
            trailer_index = data.rfind(b'\xff\xd9')
            if trailer_index == -1:
                self.flags.append('no_trailer')
            else:
                trailer_data = data[trailer_index + 2:]
                if trailer_data:
                    self.event['trailer_index'] = trailer_index

                    extract_file = strelka.File(source=self.name, )

                    for c in strelka.chunk_string(trailer_data):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
Example #24
0
    def scan(self, data, file, options, expire_at):
        with io.BytesIO(data) as bzip2_io:
            with bz2.BZ2File(filename=bzip2_io) as bzip2_obj:
                try:
                    decompressed = bzip2_obj.read()
                    self.event['size'] = len(decompressed)

                    extract_file = strelka.File(source=self.name, )

                    for c in strelka.chunk_string(decompressed):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)

                except EOFError:
                    self.flags.append('eof_error')
                except OSError:
                    self.flags.append('os_error')
Example #25
0
    def scan(self, data, file, options, expire_at):
        file_limit = options.get('limit', 1000)

        self.event['total'] = {'files': 0, 'extracted': 0}

        with io.BytesIO(data) as tar_io:
            try:
                with tarfile.open(fileobj=tar_io) as tar_obj:
                    tar_members = tar_obj.getmembers()
                    self.event['total']['files'] = len(tar_members)
                    for tar_member in tar_members:
                        if tar_member.isfile:
                            if self.event['total']['extracted'] >= file_limit:
                                break

                            try:
                                tar_file = tar_obj.extractfile(tar_member)
                                if tar_file is not None:
                                    extract_file = strelka.File(
                                        name=tar_member.name,
                                        source=self.name,
                                    )

                                    for c in strelka.chunk_string(
                                            tar_file.read()):
                                        self.upload_to_cache(
                                            extract_file.pointer,
                                            c,
                                            expire_at,
                                        )

                                    self.files.append(extract_file)
                                    self.event['total']['extracted'] += 1

                            except KeyError:
                                self.flags.append('key_error')

            except tarfile.ReadError:
                self.flags.append('tarfile_read_error')
Example #26
0
    def scan(self, data, file, options, expire_at):
        self.event['total'] = {'certificates': 0, 'extracted': 0}

        if data[:1] == b'0':
            crypto_file_type = crypto.FILETYPE_ASN1
            self.event['cryptoType'] = 'der'
        else:
            crypto_file_type = crypto.FILETYPE_PEM
            self.event['cryptoType'] = 'pem'

        try:
            pkcs7 = crypto.load_pkcs7_data(crypto_file_type, data)
            pkcs7_certificates = pkcs7.get_certificates()
            if pkcs7_certificates is not None:
                self.event['total']['certificates'] = len(pkcs7_certificates)
                for certificate in pkcs7_certificates:
                    extract_file = strelka.File(
                        name=f'sn_{certificate.get_serial_number()}',
                        source=self.name,
                    )

                    extract_data = crypto.dump_certificate(
                        crypto_file_type,
                        certificate,
                    )

                    for c in strelka.chunk_string(extract_data):
                        self.upload_to_cache(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
                    self.event['total']['extracted'] += 1

        except crypto.Error:
            self.flags.append('load_pkcs7_error')
Example #27
0
    def scan(self, data, file, options, expire_at):
        extract_text = options.get('extract_text', False)
        tmp_directory = options.get('tmp_directory', '/tmp/')

        with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data:
            tmp_data.write(data)
            tmp_data.flush()

            with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_tess:
                tess_return = subprocess.call(
                    ['tesseract', tmp_data.name, tmp_tess.name],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL
                )
                tess_txt_name = f'{tmp_tess.name}.txt'
                if tess_return == 0:
                    with open(tess_txt_name, 'rb') as tess_txt:
                        ocr_file = tess_txt.read()
                        if ocr_file:
                            self.event['text'] = ocr_file.split()
                            if extract_text:
                                extract_file = strelka.File(
                                    name='text',
                                    source=self.name,
                                )

                                for c in strelka.chunk_string(ocr_file):
                                    self.upload_to_coordinator(
                                        extract_file.pointer,
                                        c,
                                        expire_at,
                                    )

                                self.files.append(extract_file)

                else:
                    self.flags.append('return_code_{tess_return}')
                os.remove(tess_txt_name)
Example #28
0
    def scan(self, data, file, options, expire_at):
        parser = options.get('parser', 'html.parser')

        self.event['total'] = {
            'scripts': 0,
            'forms': 0,
            'inputs': 0,
            'frames': 0,
            'extracted': 0,
        }

        try:
            soup = bs4.BeautifulSoup(data, parser)

            if soup.title:
                self.event['title'] = strelka.normalize_whitespace(
                    soup.title.text)

            hyperlinks = []
            hyperlinks.extend(soup.find_all('a', href=True))
            hyperlinks.extend(soup.find_all('img', src=True))
            self.event.setdefault('hyperlinks', [])
            for hyperlink in hyperlinks:
                link = hyperlink.get('href') or hyperlink.get('src')
                if link not in self.event['hyperlinks']:
                    self.event['hyperlinks'].append(link)

            forms = soup.find_all('form')
            self.event['total']['forms'] = len(forms)
            self.event.setdefault('forms', [])
            for form in forms:
                form_entry = {
                    'action': form.get('action'),
                    'method': form.get('method'),
                }
                if form_entry not in self.event['forms']:
                    self.event['forms'].append(form_entry)

            frames = []
            frames.extend(soup.find_all('frame'))
            frames.extend(soup.find_all('iframe'))
            self.event['total']['frames'] = len(frames)
            self.event.setdefault('frames', [])
            for frame in frames:
                frame_entry = {
                    'src': frame.get('src'),
                    'name': frame.get('name'),
                    'height': frame.get('height'),
                    'width': frame.get('width'),
                    'border': frame.get('border'),
                    'id': frame.get('id'),
                    'style': frame.get('style'),
                }
                if frame_entry not in self.event['frames']:
                    self.event['frames'].append(frame_entry)

            inputs = soup.find_all('input')
            self.event['total']['inputs'] = len(inputs)
            self.event.setdefault('inputs', [])
            for input in inputs:
                input_entry = {
                    'type': input.get('type'),
                    'name': input.get('name'),
                    'value': input.get('value'),
                }
                if input_entry not in self.event['inputs']:
                    self.event['inputs'].append(input_entry)

            scripts = soup.find_all('script')
            self.event['total']['scripts'] = len(scripts)
            self.event.setdefault('scripts', [])
            for (index, script) in enumerate(scripts):
                script_flavors = [
                    script.get('language', '').lower(),
                    script.get('type', '').lower(),
                ]
                script_entry = {
                    'src': script.get('src'),
                    'language': script.get('language'),
                    'type': script.get('type'),
                }
                if script_entry not in self.event['scripts']:
                    self.event['scripts'].append(script_entry)

                if script.text:
                    extract_file = strelka.File(
                        name=f'script_{index}',
                        source=self.name,
                    )
                    extract_file.add_flavors({'external': script_flavors})

                    for c in strelka.chunk_string(script.text):
                        self.upload_to_cache(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
                    self.event['total']['extracted'] += 1

            spans = soup.find_all('span')
            self.event['total']['spans'] = len(spans)
            self.event.setdefault('spans', [])
            for span in spans:
                span_entry = {
                    'class': span.get('class'),
                    'style': span.get('style'),
                }
                if span_entry not in self.event['spans']:
                    self.event['spans'].append(span_entry)

        except TypeError:
            self.flags.append('type_error')
Example #29
0
    def scan(self, data, file, options, expire_at):
        tmp_directory = options.get('tmp_directory', '/tmp/')

        with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data:
            tmp_data.write(data)
            tmp_data.flush()

            try:
                with rpmfile.open(tmp_data.name) as rpm_obj:
                    extract_name = ''
                    for (key, value) in rpm_obj.headers.items():
                        if key == 'arch':
                            self.event['architecture'] = value
                        elif key == 'archive_compression':
                            self.event['archive_compression'] = value
                        elif key == 'archive_format':
                            self.event['archive_format'] = value
                        elif key == 'authors':
                            self.event['authors'] = value
                        elif key == 'buildhost':
                            self.event['build_host'] = value
                        elif key == 'buildtime':
                            self.event['build_time'] = value
                        elif key == 'copyright':
                            self.event['copyright'] = value
                        elif key == 'description':
                            if value is not None:
                                self.event['description'] = value.replace(b'\n', b' ')
                        elif key == 'filenames':
                            self.event['filenames'] = value
                        elif key == 'group':
                            self.event['group'] = value
                        elif key == 'name':
                            self.event['name'] = value
                            extract_name = f'{value.decode()}'
                        elif key == 'os':
                            self.event['os'] = value
                        elif key == 'packager':
                            self.event['packager'] = value
                        elif key == 'provides':
                            self.event['provides'] = value
                        elif key == 'release':
                            self.event['release'] = value
                        elif key == 'requirename':
                            self.event['require_name'] = value
                        elif key == 'rpmversion':
                            self.event['rpm_version'] = value
                        elif key == 'serial':
                            self.event['serial'] = value
                        elif key == 'sourcerpm':
                            self.event['source_rpm'] = value
                        elif key == 'summary':
                            self.event['summary'] = value
                        elif key == 'vendor':
                            self.event['vendor'] = value
                        elif key == 'version':
                            self.event['version'] = value
                        elif key == 'url':
                            self.event['url'] = value

                    extract_file = strelka.File(
                        name=extract_name,
                        source=self.name,
                    )

                    for c in strelka.chunk_string(data[rpm_obj.data_offset:]):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)

            except ValueError:
                self.flags.append('value_error')
Example #30
0
    def scan(self, data, file, options, expire_at):
        extract_text = options.get("extract_text", False)
        file_limit = options.get("limit", 2000)

        self.event["total"] = {"objects": 0, "extracted": 0}
        extracted_objects = set()

        try:
            with io.BytesIO(data) as pdf_io:

                # Open file as with PyMuPDF as file object
                reader = fitz.open(stream=pdf_io, filetype="pdf")

                # Get length of xrefs to be used in xref / annotation iteration
                xreflen = reader.xref_length()

                # Iterate through xrefs and collect annotations
                i = 0
                for xref in range(1, xreflen):

                    # PDF Annotation Flags
                    xref_object = reader.xref_object(i, compressed=False)
                    if any(obj in xref_object
                           for obj in ["/AA", "/OpenAction"]):
                        self.flags.append("auto_action")
                    if any(obj in xref_object
                           for obj in ["/JS", "/JavaScript"]):
                        self.flags.append("javascript_embedded")

                    # PDF Object Resubmission
                    # If xref is a stream, add that object back into the analysis pipeline
                    if reader.is_stream(xref):
                        try:
                            if xref not in extracted_objects:
                                extract_file = strelka.File(
                                    name=f"object_{xref}",
                                    source=self.name,
                                )

                                for c in strelka.chunk_string(
                                        reader.xref_stream(xref)):
                                    self.upload_to_coordinator(
                                        extract_file.pointer,
                                        c,
                                        expire_at,
                                    )

                                self.files.append(extract_file)
                                self.event["total"]["extracted"] += 1
                                extracted_objects.add(xref)

                        except Exception:
                            self.flags.append("stream_read_exception")
                    i += 1

                # Iterate through pages and collect links and text
                if extract_text:
                    extracted_text = ""

                try:
                    for page in reader:

                        # PDF Link Extraction
                        self.event.setdefault("annotated_uris", [])
                        links = page.get_links()
                        if links:
                            for link in links:
                                if "uri" in link:
                                    self.event["annotated_uris"].append(
                                        link["uri"])
                        if extract_text:
                            extracted_text += page.getText()

                    # PDF Text Extraction
                    # Caution: Will increase time and object storage size
                    if extract_text:
                        extract_file = strelka.File(
                            name="text",
                            source=self.name,
                        )
                        for c in strelka.chunk_string(extracted_text):
                            self.upload_to_coordinator(
                                extract_file.pointer,
                                c,
                                expire_at,
                            )
                        self.files.append(extract_file)
                        self.flags.append("extracted_text")
                except:
                    self.flags.append("page_parsing_failure")
        except Exception:
            self.flags.append("pdf_load_error")