Beispiel #1
0
    def close_destination(self, destination):
        if destination.cword == b'objdata':
            log.debug('*** Close object data at index %Xh' % self.index)
            rtfobj = RtfObject()
            self.objects.append(rtfobj)
            rtfobj.start = destination.start
            rtfobj.end = destination.end
            # Filter out all whitespaces first (just ignored):
            hexdata1 = destination.data.translate(None, b' \t\r\n\f\v')
            # Then filter out any other non-hex character:
            hexdata = re.sub(b'[^a-fA-F0-9]', b'', hexdata1)
            if len(hexdata) < len(hexdata1):
                # this is only for debugging:
                nonhex = re.sub(b'[a-fA-F0-9]', b'', hexdata1)
                log.debug('Found non-hex chars in hexdata: %r' % nonhex)
            # MS Word accepts an extra hex digit, so we need to trim it if present:
            if len(hexdata) & 1:
                log.debug('Odd length, trimmed last byte.')
                hexdata = hexdata[:-1]
            rtfobj.hexdata = hexdata
            object_data = binascii.unhexlify(hexdata)
            rtfobj.rawdata = object_data
            rtfobj.rawdata_md5 = hashlib.md5(object_data).hexdigest()
            # TODO: check if all hex data is extracted properly

            obj = oleobj.OleObject()
            try:
                obj.parse(object_data)
                rtfobj.format_id = obj.format_id
                rtfobj.class_name = obj.class_name
                rtfobj.oledata_size = obj.data_size
                rtfobj.oledata = obj.data
                rtfobj.oledata_md5 = hashlib.md5(obj.data).hexdigest()
                rtfobj.is_ole = True
                if obj.class_name.lower() == b'package':
                    opkg = oleobj.OleNativeStream(bindata=obj.data,
                                                  package=True)
                    rtfobj.filename = opkg.filename
                    rtfobj.src_path = opkg.src_path
                    rtfobj.temp_path = opkg.temp_path
                    rtfobj.olepkgdata = opkg.data
                    rtfobj.olepkgdata_md5 = hashlib.md5(opkg.data).hexdigest()
                    rtfobj.is_package = True
                else:
                    if olefile.isOleFile(obj.data):
                        ole = olefile.OleFileIO(obj.data)
                        rtfobj.clsid = ole.root.clsid
                        rtfobj.clsid_desc = clsid.KNOWN_CLSIDS.get(
                            rtfobj.clsid,
                            'unknown CLSID (please report at https://github.com/decalage2/oletools/issues)'
                        )
            except:
                pass
                log.debug('*** Not an OLE 1.0 Object')
Beispiel #2
0
    def run(self, task, param=None):
        """ Initialize variables """
        task_data = collections.OrderedDict()
        _object = collections.OrderedDict()
        output = []
        file_buffer = ""
        file_buffer_stripped = ""
        handler = None
        """ Scan the file content """
        task_data["file_sig"] = ""

        with open(self.file, 'rb') as file_content:
            file_buffer = file_content.read()
            task_data["file_sig"] = task.scanner["yara"].scan_buffer(
                file_buffer)

            # Scan stripped file content if previous match was not found
            if not task_data["file_sig"]:
                try:
                    file_buffer_stripped = self._strip_keycodes(
                        file_buffer.decode("utf8"))
                    task_data["file_sig"] = task.scanner["yara"].scan_buffer(
                        file_buffer_stripped)
                except UnicodeDecodeError:
                    logger.warning(
                        f"UnicodeDecodeError: Failed to decode -> {self.file}")
        """ Get the right handler by file_sig """
        handler = self._get_handler(task_data["file_sig"])
        if handler:
            if file_buffer_stripped:
                handler(self, file_buffer_stripped, task_data)
            else:
                handler(self, file_buffer, task_data)
        """ Get info about all objects available """
        _objects = list(rtfobj.rtf_iter_objects(self.file))

        task_data["obj_count"] = len(_objects)
        task_data["body_ioc_strings"] = task.scanner["regex"].ioc_scan(
            file_buffer.decode("utf8"))
        """ Update Task's IOCs """
        task.update_ioc(task_data["body_ioc_strings"])

        task_data["body_text"] = self._striprtf(file_buffer.decode("utf8"))

        if _objects:
            for offset, orig_len, data in _objects:
                _object["obj_offset"] = '0x%08X' % offset
                try:
                    _oleobj = oleobj.OleObject()
                    _oleobj.parse(data)
                    _object["ole_type"] = str(_oleobj.class_name)
                    _object["ole_size"] = _oleobj.data_size
                except Exception:
                    _object["ole_type"] = ""
                    _object["ole_size"] = ""

                _object["obj_size"] = len(data)
                _object["obj_sig"] = str(data[:self.obj_sig_len])

                try:
                    unique_strings = ""
                    data_str = data.decode(errors='ignore')
                    unique_strings = "\r".join(
                        list(
                            set(
                                re.findall("[^\x00-\x1F\x7F-\xFF]{3,}",
                                           data_str))))
                except Exception:
                    unique_strings = ""

                _object["ole_strings"] = unique_strings
                """ Scan the data object content """
                ole_yarasig = ""
                ole_yarasig = task.scanner["yara"].scan_buffer(data)
                _object["ole_yara_sig"] = ole_yarasig

                matched_strings = ""
                matched_strings = task.scanner["regex"].ioc_scan(
                    unique_strings)
                _object["ole_regex_strings"] = matched_strings
                """ Update Task's IOCs """
                task.update_ioc(_object["ole_regex_strings"])

                output.append(_object.copy())
                _object.clear()

        else:
            logger.warning(f"No objects found. File: {self.file}")
        """ Properly close the task before returning from the function"""
        task_data["objects"] = output
        self.end(task, task_data)