Esempio n. 1
0
    def scan(self, data, file, options, expire_at):
        self.event['total'] = {'attachments': 0, 'extracted': 0}
        self.event.setdefault('object_names', [])

        tnef = tnefparse.TNEF(data)
        tnef_objects = getattr(tnef, 'objects', [])
        for tnef_object in tnef_objects:
            descriptive_name = tnefparse.TNEF.codes.get(tnef_object.name)
            if descriptive_name not in self.event['object_names']:
                self.event['object_names'].append(descriptive_name)

            try:
                object_data = tnef_object.data.strip(b'\0') or None
            except:
                object_data = tnef_object.data

            if object_data is not None:
                if descriptive_name == 'Subject':
                    self.event['subject'] = object_data
                elif descriptive_name == 'Message ID':
                    self.event['message_id'] = object_data
                elif descriptive_name == 'Message Class':
                    self.event['message_class'] = object_data

        tnef_attachments = getattr(tnef, 'attachments', [])
        self.event['total']['attachments'] = len(tnef_attachments)
        for attachment in tnef_attachments:
            extract_file = strelka.File(
                name=attachment.name.decode(),
                source=self.name,
            )

            for c in strelka.chunk_string(attachment.data):
                self.upload_to_coordinator(
                    extract_file.pointer,
                    c,
                    expire_at,
                )

            self.files.append(extract_file)
            self.event['total']['extracted'] += 1

        tnef_html = getattr(tnef, 'htmlbody', None)
        if tnef_html is not None:
            extract_file = strelka.File(
                name='htmlbody',
                source=self.name,
            )

            for c in strelka.chunk_string(tnef_html):
                self.upload_to_coordinator(
                    extract_file.pointer,
                    c,
                    expire_at,
                )

            self.files.append(extract_file)
Esempio n. 2
0
    def scan(self, data, file, options, expire_at):
        file_limit = options.get('limit', 1000)

        self.event['total'] = {'objects': 0, 'extracted': 0}

        rtf = rtfobj.RtfObjParser(data)
        rtf.parse()
        self.event['total']['objects'] = len(rtf.objects)

        for object in rtf.objects:
            if self.event['total']['extracted'] >= file_limit:
                break

            index = rtf.server.index(object)
            if object.is_package:
                extract_file = strelka.File(
                    name=object.filename,
                    source=self.name,
                )

                for c in strelka.chunk_string(object.olepkgdata):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

            elif object.is_ole:
                extract_file = strelka.File(
                    name=f'object_{index}',
                    source=self.name,
                )

                for c in strelka.chunk_string(object.oledata):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

            else:
                extract_file = strelka.File(
                    name=f'object_{index}',
                    source=self.name,
                )

                for c in strelka.chunk_string(object.rawdata):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

            self.files.append(extract_file)
            self.event['total']['extracted'] += 1
Esempio n. 3
0
    def scan(self, data, file, options, expire_at):
        self.event['total'] = {'streams': 0, 'extracted': 0}

        try:
            ole = olefile.OleFileIO(data)
            ole_streams = ole.listdir(streams=True)
            self.event['total']['streams'] = len(ole_streams)
            for stream in ole_streams:
                file = ole.openstream(stream)
                extract_data = file.read()
                extract_name = f'{"_".join(stream)}'
                extract_name = re.sub(r'[\x00-\x1F]', '', extract_name)
                if extract_name.endswith('Ole10Native'):
                    native_stream = oletools.oleobj.OleNativeStream(
                        bindata=extract_data,
                    )
                    if native_stream.filename:
                        extract_name = extract_name + f'_{str(native_stream.filename)}'
                    else:
                        extract_name = extract_name + '_native_data'

                    extract_file = strelka.File(
                        name=extract_name,
                        source=self.name,
                    )

                    for c in strelka.chunk_string(native_stream.data):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                else:
                    extract_file = strelka.File(
                        name=extract_name,
                        source=self.name,
                    )

                    for c in strelka.chunk_string(extract_data):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                self.files.append(extract_file)
                self.event['total']['extracted'] += 1

        except OSError:
            self.flags.append('os_error')
        finally:
            ole.close()
Esempio n. 4
0
		def scan(self, data, file, options, expire_at):
				
				jtr_path = options.get('jtr_path', '/jtr/')
				tmp_directory = options.get('tmp_file_directory', '/tmp/')
				file_limit = options.get('limit', 1000)
				password_file = options.get('password_file', '/etc/strelka/passwords.dat')
				log_extracted_pws = options.get('log_pws', False)
				scanner_timeout = options.get('scanner_timeout', 150)
				brute = options.get('brute_force', False)
				max_length = options.get('max_length', 5)

				self.event['total'] = {'files': 0, 'extracted': 0}

				with io.BytesIO(data) as zip_io:
						try:
								with zipfile.ZipFile(zip_io) as zip_obj:
										name_list = zip_obj.namelist()
										self.event['total']['files'] = len(name_list)

										extracted_pw = crack_zip(self, data, jtr_path, tmp_directory, brute=brute, scanner_timeout=scanner_timeout, max_length=max_length, password_file=password_file)
										if not extracted_pw:
												self.flags.append('Could not extract password')
												return
										if log_extracted_pws:
												self.event['cracked_password'] = extracted_pw
										for i, name in enumerate(name_list):
												if not name.endswith('/'):
														if self.event['total']['extracted'] >= file_limit:
																break

														try:
																extract_data = zip_obj.read(name, extracted_pw)

																if extract_data:
																		extract_file = strelka.File(
																				name=name,
																				source=self.name,
																		)

																		for c in strelka.chunk_string(extract_data):
																				self.upload_to_coordinator(
																						extract_file.pointer,
																						c,
																						expire_at,
																				)

																		self.files.append(extract_file)
																		self.event['total']['extracted'] += 1

														except NotImplementedError:
																self.flags.append('unsupported_compression')
														except RuntimeError:
																self.flags.append('runtime_error')
														except ValueError:
																self.flags.append('value_error')
														except zlib.error:
																self.flags.append('zlib_error')

						except zipfile.BadZipFile:
								self.flags.append('bad_zip')
Esempio n. 5
0
    def scan(self, data, file, options, expire_at):
        tmp_directory = options.get('tmp_directory', '/tmp/')

        with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data:
            tmp_data.write(data)
            tmp_data.flush()

            (stdout, stderr) = subprocess.Popen(
                ['antiword', tmp_data.name],
                stdout=subprocess.PIPE,
                stderr=subprocess.DEVNULL
            ).communicate()

            if stdout:
                extract_file = strelka.File(
                    name='text',
                    source=self.name,
                )

                for c in strelka.chunk_string(stdout):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

                self.files.append(extract_file)
Esempio n. 6
0
    def scan(self, data, file, options, expire_at):

        with io.BytesIO(data) as encoded_file:
            extract_data = b''

            try:
                extract_data = base64.b64decode(encoded_file.read())
                self.event['decoded_header'] = extract_data[:50]
            except binascii.Error:
                self.flags.append('not_decodable_from_base64')
            
            if extract_data:

                extract_file = strelka.File(
                    source=self.name,
                )

                for c in strelka.chunk_string(extract_data):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

                self.files.append(extract_file)
Esempio n. 7
0
    def scan(self, data, file, options, expire_at):
        tmp_directory = options.get('tmp_directory', '/tmp/')

        with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data:
            tmp_data.write(data)
            tmp_data.flush()

            upx_return = subprocess.call(
                ['upx', '-d', tmp_data.name, '-o', f'{tmp_data.name}_upx'],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL)
            if upx_return == 0:
                with open(f'{tmp_data.name}_upx', 'rb') as upx_fin:
                    upx_file = upx_fin.read()
                    upx_size = len(upx_file)
                    if upx_size > len(data):
                        extract_file = strelka.File(source=self.name, )
                        for c in strelka.chunk_string(upx_file):
                            self.upload_to_coordinator(
                                extract_file.pointer,
                                c,
                                expire_at,
                            )
                        self.files.append(extract_file)

                os.remove(f'{tmp_data.name}_upx')

            else:
                self.flags.append(f'return_code_{upx_return}')
Esempio n. 8
0
    def scan(self, data, file, options, expire_at):
        file_limit = options.get('limit', 1000)

        self.event['total'] = {'files': 0, 'extracted': 0}

        try:
            with libarchive.memory_reader(data) as archive:
                for entry in archive:
                    self.event['total']['files'] += 1
                    if entry.isfile:
                        if self.event['total']['extracted'] >= file_limit:
                            continue

                        extract_file = strelka.File(
                            name=entry.pathname,
                            source=self.name,
                        )

                        for block in entry.get_blocks():
                            self.upload_to_cache(
                                extract_file.pointer,
                                block,
                                expire_at,
                            )

                        self.files.append(extract_file)
                        self.event['total']['extracted'] += 1

        except libarchive.ArchiveError:
            self.flags.append('libarchive_archive_error')
Esempio n. 9
0
    def scan(self, data, file, options, expire_at):
        file_limit = options.get('limit', 1000)

        self.event['total'] = {'files': 0, 'extracted': 0}

        with io.BytesIO(data) as rar_io:
            with rarfile.RarFile(rar_io) as rar_obj:
                rf_info_list = rar_obj.infolist()
                self.event['total']['files'] = len(rf_info_list)
                for rf_object in rf_info_list:
                    if not rf_object.isdir():
                        if self.event['total']['extracted'] >= file_limit:
                            break

                        file_info = rar_obj.getinfo(rf_object)
                        if not file_info.needs_password():
                            self.event['host_os'] = HOST_OS_MAPPING[file_info.host_os]

                            extract_file = strelka.File(
                                name=f'{file_info.filename}',
                                source=self.name,
                            )

                            for c in strelka.chunk_string(rar_obj.read(rf_object)):
                                self.upload_to_coordinator(
                                    extract_file.pointer,
                                    c,
                                    expire_at,
                                )

                            self.files.append(extract_file)
                            self.event['total']['extracted'] += 1

                        else:
                            self.flags.append('password_protected')
Esempio n. 10
0
    def scan(self, data, file, options, expire_at):
        try:
            with io.BytesIO(data) as lzma_io:
                with lzma.LZMAFile(filename=lzma_io) as lzma_obj:
                    try:
                        decompressed_file = lzma_obj.read()
                        decompressed_size = len(decompressed_file)
                        self.event['decompressed_size'] = decompressed_size

                        extract_file = strelka.File(source=self.name, )

                        for c in strelka.chunk_string(decompressed_file):
                            self.upload_to_coordinator(
                                extract_file.pointer,
                                c,
                                expire_at,
                            )

                        self.files.append(extract_file)

                    except EOFError:
                        self.flags.append('eof_error')

        except lzma.LZMAError:
            self.flags.append('lzma_error')
Esempio n. 11
0
    def scan(self, data, file, options, expire_at):
        tmp_directory = options.get('tmp_directory', '/tmp/')

        self.event['total'] = {'certificates': 0, 'extracted': 0}

        with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data:
            tmp_data.write(data)
            tmp_data.flush()

            if data[:1] == b'0':
                pkcs7 = SMIME.load_pkcs7_der(tmp_data.name)
            else:
                pkcs7 = SMIME.load_pkcs7(tmp_data.name)

            certs = pkcs7.get0_signers(X509.X509_Stack())
            if certs:
                self.event['total']['certificates'] = len(certs)
                for cert in certs:
                    extract_file = strelka.File(
                        name=f'sn_{cert.get_serial_number()}',
                        source=self.name,
                    )

                    for c in strelka.chunk_string(cert.as_der()):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
                    self.event['total']['extracted'] += 1
Esempio n. 12
0
    def scan(self, data, file, options, expire_at):
        with io.BytesIO(data) as swf_io:
            swf_io.seek(4)
            swf_size = struct.unpack('<i', swf_io.read(4))[0]
            swf_io.seek(0)
            magic = swf_io.read(3)
            extract_data = b'FWS' + swf_io.read(5)

            if magic == b'CWS':
                self.event['type'] = 'CWS'
                try:
                    extract_data += zlib.decompress(swf_io.read())[:swf_size - 8]
                    extract_file = strelka.File(
                        source=self.name,
                    )

                    for c in strelka.chunk_string(extract_data):
                        self.upload_to_cache(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)

                except zlib.error:
                    self.flags.append('zlib_error')

            elif magic == b'ZWS':
                self.event['type'] = 'ZWS'
                swf_io.seek(12)
                extract_data += pylzma.decompress(swf_io.read())[:swf_size - 8]
                extract_file = strelka.File(
                    source=self.name,
                )

                for c in strelka.chunk_string(extract_data):
                    self.upload_to_cache(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

                self.files.append(extract_file)

            elif magic == b'FWS':
                self.event['type'] = 'FWS'
Esempio n. 13
0
    def scan(self, data, file, options, expire_at):
        file_limit = options.get('limit', 1000)

        self.event['total'] = {'files': 0, 'extracted': 0}

        with io.BytesIO(data) as zip_io:
            try:
                with zipfile.ZipFile(zip_io) as zip_obj:
                    name_list = zip_obj.namelist()
                    self.event['total']['files'] = len(name_list)

                    for i, name in enumerate(name_list):
                        if not name.endswith('/'):
                            if self.event['total']['extracted'] >= file_limit:
                                break

                            try:
                                extract_data = b''
                                zinfo = zip_obj.getinfo(name)

                                if zinfo.flag_bits & 0x1:
                                    if i == 0:
                                        self.flags.append('encrypted')

                                else:
                                    extract_data = zip_obj.read(name)

                                if extract_data:
                                    extract_file = strelka.File(
                                        name=name,
                                        source=self.name,
                                    )

                                    for c in strelka.chunk_string(
                                            extract_data):
                                        self.upload_to_coordinator(
                                            extract_file.pointer,
                                            c,
                                            expire_at,
                                        )

                                    self.files.append(extract_file)
                                    self.event['total']['extracted'] += 1

                            except NotImplementedError:
                                self.flags.append('unsupported_compression')
                            except RuntimeError:
                                self.flags.append('runtime_error')
                            except ValueError:
                                self.flags.append('value_error')
                            except zlib.error:
                                self.flags.append('zlib_error')

            except zipfile.BadZipFile:
                self.flags.append('bad_zip')
Esempio n. 14
0
    def scan(self, data, file, options, expire_at):
        analyze_macros = options.get('analyze_macros', True)

        self.event['total'] = {'files': 0, 'extracted': 0}

        try:
            vba = olevba3.VBA_Parser(filename=file.name, data=data)
            if vba.detect_vba_macros():
                extract_macros = list(vba.extract_macros())
                self.event['total']['files'] = len(extract_macros)
                for (filename, stream_path, vba_filename,
                     vba_code) in extract_macros:
                    extract_file = strelka.File(
                        name=f'{vba_filename}',
                        source=self.name,
                    )

                    for c in strelka.chunk_string(vba_code):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
                    self.event['total']['extracted'] += 1

                if analyze_macros:
                    self.event.setdefault('auto_exec', [])
                    self.event.setdefault('base64', [])
                    self.event.setdefault('dridex', [])
                    self.event.setdefault('hex', [])
                    self.event.setdefault('ioc', [])
                    self.event.setdefault('suspicious', [])
                    macros = vba.analyze_macros()
                    for (macro_type, keyword, description) in macros:
                        if macro_type == 'AutoExec':
                            self.event['auto_exec'].append(keyword)
                        elif macro_type == 'Base64 String':
                            self.event['base64'].append(keyword)
                        elif macro_type == 'Dridex String':
                            self.event['dridex'].append(keyword)
                        elif macro_type == 'Hex String':
                            self.event['hex'].append(keyword)
                        elif macro_type == 'IOC':
                            self.event['ioc'].append(keyword)
                        elif macro_type == 'Suspicious':
                            self.event['suspicious'].append(keyword)

        except olevba3.FileOpenError:
            self.flags.append('file_open_error')
        finally:
            # TODO referenced before potential assignment as vba is opened in a try / catch block
            vba.close()
Esempio n. 15
0
    def scan(self, data, file, options, expire_at):
        password_file = options.get('password_file', '/etc/strelka/passwords.dat')

        if not self.passwords:
            if os.path.isfile(password_file):
                with open(password_file, 'rb') as f:
                    for line in f:
                        self.passwords.append(line.strip())

        with io.BytesIO(data) as doc_io:

            msoff_doc = msoffcrypto.OfficeFile(doc_io)
            output_doc = io.BytesIO()
            password = ''
            extract_data = b''

            if msoff_doc.is_encrypted():             
                self.flags.append('password_protected')
                
                for pw in self.passwords:
                    if not password:
                        try:
                            msoff_doc.load_key(password=pw.decode('utf-8'))
                            output_doc.seek(0)
                            msoff_doc.decrypt(output_doc)
                            output_doc.seek(0)

                            if output_doc.readable():
                                extract_data = output_doc.read()
                                password = pw.decode('utf-8')
                                break

                        except Exception:
                            pass

            if password:
                self.event['password'] = password
                
                extract_file = strelka.File(
                    source=self.name,
                )

                for c in strelka.chunk_string(extract_data):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

                self.files.append(extract_file)
            else:
                self.flags.append('no_password_match_found')
Esempio n. 16
0
    def scan(self, data, file, options, expire_at):
        decoded = base64.b64decode(data)

        extract_file = strelka.File(source=self.name, )

        for c in strelka.chunk_string(decoded):
            self.upload_to_coordinator(
                extract_file.pointer,
                c,
                expire_at,
            )

        self.files.append(extract_file)
Esempio n. 17
0
    def scan(self, data, file, options, expire_at):
        headers = options.get('headers', [])

        self.event['total'] = {'parts': 0, 'extracted': 0}

        try:
            message = email.message_from_string(data.decode(
                'UTF-8', 'replace'))

            self.event['headers'] = []
            for h, v in message.items():
                if headers and h not in headers:
                    continue

                self.event['headers'].append({
                    'header': h,
                    'value': v,
                })

            self.event['parts'] = []
            for (index, part) in enumerate(message.walk()):
                self.event['total']['parts'] += 1
                extract_data = part.get_payload(decode=True)
                if extract_data is not None:
                    part_filename = part.get_filename()
                    if part_filename is not None:
                        extract_name = f'{part_filename}'
                        self.event['parts'].append(part_filename)
                    else:
                        extract_name = f'part_{index}'

                    extract_file = strelka.File(
                        name=extract_name,
                        source=self.name,
                    )
                    extract_file.add_flavors(
                        {'external': [part.get_content_type()]})

                    for c in strelka.chunk_string(extract_data):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
                    self.event['total']['extracted'] += 1

        except AssertionError:
            self.flags.append('assertion_error')
Esempio n. 18
0
    def _recurse_node(self, node, xml_args):
        """Recursively parses XML file.

        The XML file is recursively parsed down every node tree.

        Args:
            node: node to be recursively parsed.
            xml_args: options set by the scanner that affect XMl parsing.
        """
        if node is not None:
            if hasattr(node.tag, '__getitem__'):
                if node.tag.startswith('{'):
                    namespace, separator, tag = node.tag[1:].partition('}')
                else:
                    namespace = None
                    tag = node.tag

                self.event['total']['tags'] += 1
                if namespace not in self.event['namespaces']:
                    self.event['namespaces'].append(namespace)
                if tag not in self.event['tags']:
                    self.event['tags'].append(tag)

                text = node.attrib.get('name', node.text)
                if text is not None:
                    if tag in xml_args['metadata_tags']:
                        tag_data = {'tag': tag, 'text': text.strip()}
                        if tag_data not in self.event['tag_data']:
                            self.event['tag_data'].append(tag_data)
                    elif tag in xml_args['extract_tags']:
                        extract_file = strelka.File(
                            name=tag,
                            source=self.name,
                        )

                        for c in strelka.chunk_string(text):
                            self.upload_to_coordinator(
                                extract_file.pointer,
                                c,
                                self.expire_at,
                            )

                        self.files.append(extract_file)
                        self.event['total']['extracted'] += 1

            for child in node.getchildren():
                self._recurse_node(self, child, xml_args)

        return
Esempio n. 19
0
    def scan(self, data, file, options, expire_at):
        with io.BytesIO(data) as gzip_io:
            with gzip.GzipFile(fileobj=gzip_io) as gzip_obj:
                decompressed = gzip_obj.read()
                self.event['size'] = len(decompressed)

                extract_file = strelka.File(source=self.name, )

                for c in strelka.chunk_string(decompressed):
                    self.upload_to_coordinator(
                        extract_file.pointer,
                        c,
                        expire_at,
                    )

                self.files.append(extract_file)
Esempio n. 20
0
 def scan(self, data, file, options, expire_at):
     ans = False
     image = np.fromstring(data, np.uint8)
     image = cv2.imdecode(image, cv2.IMREAD_COLOR)
     bits = self._get_bits(image)
     bytes_ = self._get_bytes(bits)
     chars = []
     chars.append(self._convert_bytes_to_text(bytes_))
     flag = (''.join(chars).encode('ascii', 'ignore'))
     if (len(flag) > 1):
         ans = True
         self.event['lsb'] = ans
         #print("This Image might have something stored in")
     else:
         extract_file = strelka.File(source=self.name)
         self.event['lsb'] = ans
Esempio n. 21
0
    def scan(self, data, file, options, expire_at):
        decompressed = zlib.decompress(data)
        self.event["size"] = len(decompressed)

        extract_file = strelka.File(
            source=self.name,
        )

        for c in strelka.chunk_string(decompressed):
            self.upload_to_coordinator(
                extract_file.pointer,
                c,
                expire_at,
            )

        self.files.append(extract_file)
Esempio n. 22
0
    def scan(self, data, file, options, expire_at):

        jtr_path = options.get('jtr_path', '/jtr/')
        tmp_directory = options.get('tmp_file_directory', '/tmp/')
        password_file = options.get('password_file',
                                    '/etc/strelka/passwords.dat')
        log_extracted_pws = options.get('log_pws', False)
        scanner_timeout = options.get('scanner_timeout', 150)
        brute = options.get('brute_force', False)
        max_length = options.get('max_length', 5)

        with io.BytesIO(data) as doc_io:

            msoff_doc = msoffcrypto.OfficeFile(doc_io)
            output_doc = io.BytesIO()
            if extracted_pw := crack_word(self,
                                          data,
                                          jtr_path,
                                          tmp_directory,
                                          brute=brute,
                                          scanner_timeout=scanner_timeout,
                                          max_length=max_length,
                                          password_file=password_file):
                if log_extracted_pws:
                    self.event['cracked_password'] = extracted_pw
                try:
                    msoff_doc.load_key(password=extracted_pw.decode('utf-8'))
                    msoff_doc.decrypt(output_doc)
                    output_doc.seek(0)
                    extract_data = output_doc.read()
                    output_doc.seek(0)
                    extract_file = strelka.File(source=self.name, )

                    for c in strelka.chunk_string(extract_data):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
                except:
                    self.flags.append(
                        'Could not decrypt document with recovered password')

            else:
Esempio n. 23
0
    def scan(self, data, file, options, expire_at):
        extract_text = options.get('extract_text', False)

        with io.BytesIO(data) as docx_io:
            docx_doc = docx.Document(docx_io)
            self.event['author'] = docx_doc.core_properties.author
            self.event['category'] = docx_doc.core_properties.category
            self.event['comments'] = docx_doc.core_properties.comments
            self.event[
                'content_status'] = docx_doc.core_properties.content_status
            if docx_doc.core_properties.created is not None:
                self.event[
                    'created'] = docx_doc.core_properties.created.isoformat()
            self.event['identifier'] = docx_doc.core_properties.identifier
            self.event['keywords'] = docx_doc.core_properties.keywords
            self.event['language'] = docx_doc.core_properties.language
            self.event[
                'last_modified_by'] = docx_doc.core_properties.last_modified_by
            if docx_doc.core_properties.last_printed is not None:
                self.event[
                    'last_printed'] = docx_doc.core_properties.last_printed.isoformat(
                    )
            if docx_doc.core_properties.modified is not None:
                self.event[
                    'modified'] = docx_doc.core_properties.modified.isoformat(
                    )
            self.event['revision'] = docx_doc.core_properties.revision
            self.event['subject'] = docx_doc.core_properties.subject
            self.event['title'] = docx_doc.core_properties.title
            self.event['version'] = docx_doc.core_properties.version

            if extract_text:
                extract_file = strelka.File(
                    name='text',
                    source=self.name,
                )

                for paragraph in docx_doc.paragraphs:
                    self.upload_to_cache(
                        extract_file.pointer,
                        paragraph.text,
                        expire_at,
                    )

                self.files.append(extract_file)
Esempio n. 24
0
    def scan(self, data, file, options, expire_at):
        self.event['total'] = {'parts': 0, 'extracted': 0}

        try:
            message = email.message_from_string(data.decode(
                'UTF-8', 'replace'))

            self.event.setdefault('headers', [])
            for (key, value) in message.items():
                normalized_value = strelka.normalize_whitespace(value.strip())
                header_entry = {'header': key, 'value': normalized_value}
                if header_entry not in self.event['headers']:
                    self.event['headers'].append(header_entry)

            self.event.setdefault('parts', [])
            for (index, part) in enumerate(message.walk()):
                self.event['total']['parts'] += 1
                extract_data = part.get_payload(decode=True)
                if extract_data is not None:
                    part_filename = part.get_filename()
                    if part_filename is not None:
                        extract_name = f'{part_filename}'
                        self.event['parts'].append(part_filename)
                    else:
                        extract_name = f'part_{index}'

                    extract_file = strelka.File(
                        name=extract_name,
                        source=self.name,
                    )
                    extract_file.add_flavors(
                        {'external': [part.get_content_type()]})

                    for c in strelka.chunk_string(extract_data):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
                    self.event['total']['extracted'] += 1

        except AssertionError:
            self.flags.append('assertion_error')
Esempio n. 25
0
    def scan(self, data, file, options, expire_at):
        expectedSize = int.from_bytes(data[2:6], "little")
        actualSize = len(data)
        if expectedSize != actualSize:
            self.event['trailer_index'] = expectedSize
            trailer_bytes_data = data[expectedSize:]
            extract_file = strelka.File(
                source=self.name,
            )

            for c in strelka.chunk_string(trailer_bytes_data):
                self.upload_to_coordinator(
                    extract_file.pointer,
                    c,
                    expire_at,
                )
            self.event['BMP_EOF'] = data[expectedSize:]
            self.files.append(extract_file)
        else:
            self.flags.append('no_trailer')
Esempio n. 26
0
    def scan(self, data, file, options, expire_at):
        if not data.endswith(b'\xff\xd9'):
            trailer_index = data.rfind(b'\xff\xd9')
            if trailer_index == -1:
                self.flags.append('no_trailer')
            else:
                trailer_data = data[trailer_index + 2:]
                if trailer_data:
                    self.event['trailer_index'] = trailer_index

                    extract_file = strelka.File(source=self.name, )

                    for c in strelka.chunk_string(trailer_data):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)
Esempio n. 27
0
    def scan(self, data, file, options, expire_at):
        datalen = len(data)
        if (data[datalen - 1]
                == b'\x82') and (data[datalen - 2]
                                 == b'\x60') and (data[len(data) - 3]
                                                  == b'\x42'):
            # file DOES NOT have data after EOF, found end of file
            self.flags.append('no_trailer')
        else:  # the file DOES have data after EOF, did not find end of file
            trailer_index = data.rfind(b'\x42\x60\x82')
            if trailer_index == -1:
                self.event[
                    'end_index'] = -1  # didn't find the offical ending of the file
            else:
                trailer_index = trailer_index + 3
                self.event['trailer_index'] = trailer_index

                extract_file = strelka.File(source=self.name)
                self.event['PNG_EOF'] = data[trailer_index:]

                self.files.append(extract_file)
Esempio n. 28
0
    def scan(self, data, file, options, expire_at):
        with io.BytesIO(data) as bzip2_io:
            with bz2.BZ2File(filename=bzip2_io) as bzip2_obj:
                try:
                    decompressed = bzip2_obj.read()
                    self.event['size'] = len(decompressed)

                    extract_file = strelka.File(source=self.name, )

                    for c in strelka.chunk_string(decompressed):
                        self.upload_to_coordinator(
                            extract_file.pointer,
                            c,
                            expire_at,
                        )

                    self.files.append(extract_file)

                except EOFError:
                    self.flags.append('eof_error')
                except OSError:
                    self.flags.append('os_error')
Esempio n. 29
0
    def scan(self, data, file, options, expire_at):
        file_limit = options.get('limit', 1000)

        self.event['total'] = {'files': 0, 'extracted': 0}

        with io.BytesIO(data) as tar_io:
            try:
                with tarfile.open(fileobj=tar_io) as tar_obj:
                    tar_members = tar_obj.getmembers()
                    self.event['total']['files'] = len(tar_members)
                    for tar_member in tar_members:
                        if tar_member.isfile:
                            if self.event['total']['extracted'] >= file_limit:
                                break

                            try:
                                tar_file = tar_obj.extractfile(tar_member)
                                if tar_file is not None:
                                    extract_file = strelka.File(
                                        name=tar_member.name,
                                        source=self.name,
                                    )

                                    for c in strelka.chunk_string(
                                            tar_file.read()):
                                        self.upload_to_cache(
                                            extract_file.pointer,
                                            c,
                                            expire_at,
                                        )

                                    self.files.append(extract_file)
                                    self.event['total']['extracted'] += 1

                            except KeyError:
                                self.flags.append('key_error')

            except tarfile.ReadError:
                self.flags.append('tarfile_read_error')
Esempio n. 30
0
    def scan(self, data, file, options, expire_at):
        extract_text = options.get('extract_text', False)
        tmp_directory = options.get('tmp_directory', '/tmp/')

        with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data:
            tmp_data.write(data)
            tmp_data.flush()

            with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_tess:
                tess_return = subprocess.call(
                    ['tesseract', tmp_data.name, tmp_tess.name],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL
                )
                tess_txt_name = f'{tmp_tess.name}.txt'
                if tess_return == 0:
                    with open(tess_txt_name, 'rb') as tess_txt:
                        ocr_file = tess_txt.read()
                        if ocr_file:
                            self.event['text'] = ocr_file.split()
                            if extract_text:
                                extract_file = strelka.File(
                                    name='text',
                                    source=self.name,
                                )

                                for c in strelka.chunk_string(ocr_file):
                                    self.upload_to_coordinator(
                                        extract_file.pointer,
                                        c,
                                        expire_at,
                                    )

                                self.files.append(extract_file)

                else:
                    self.flags.append('return_code_{tess_return}')
                os.remove(tess_txt_name)