def close_destination(self, destination): if destination.cword == b'objdata': log.debug('*** Close object data at index %Xh' % self.index) rtfobj = RtfObject() self.objects.append(rtfobj) rtfobj.start = destination.start rtfobj.end = destination.end # Filter out all whitespaces first (just ignored): hexdata1 = destination.data.translate(None, b' \t\r\n\f\v') # Then filter out any other non-hex character: hexdata = re.sub(b'[^a-fA-F0-9]', b'', hexdata1) if len(hexdata) < len(hexdata1): # this is only for debugging: nonhex = re.sub(b'[a-fA-F0-9]', b'', hexdata1) log.debug('Found non-hex chars in hexdata: %r' % nonhex) # MS Word accepts an extra hex digit, so we need to trim it if present: if len(hexdata) & 1: log.debug('Odd length, trimmed last byte.') hexdata = hexdata[:-1] rtfobj.hexdata = hexdata object_data = binascii.unhexlify(hexdata) rtfobj.rawdata = object_data rtfobj.rawdata_md5 = hashlib.md5(object_data).hexdigest() # TODO: check if all hex data is extracted properly obj = oleobj.OleObject() try: obj.parse(object_data) rtfobj.format_id = obj.format_id rtfobj.class_name = obj.class_name rtfobj.oledata_size = obj.data_size rtfobj.oledata = obj.data rtfobj.oledata_md5 = hashlib.md5(obj.data).hexdigest() rtfobj.is_ole = True if obj.class_name.lower() == b'package': opkg = oleobj.OleNativeStream(bindata=obj.data, package=True) rtfobj.filename = opkg.filename rtfobj.src_path = opkg.src_path rtfobj.temp_path = opkg.temp_path rtfobj.olepkgdata = opkg.data rtfobj.olepkgdata_md5 = hashlib.md5(opkg.data).hexdigest() rtfobj.is_package = True else: if olefile.isOleFile(obj.data): ole = olefile.OleFileIO(obj.data) rtfobj.clsid = ole.root.clsid rtfobj.clsid_desc = clsid.KNOWN_CLSIDS.get( rtfobj.clsid, 'unknown CLSID (please report at https://github.com/decalage2/oletools/issues)' ) except: pass log.debug('*** Not an OLE 1.0 Object')
def run(self, task, param=None): """ Initialize variables """ task_data = collections.OrderedDict() _object = collections.OrderedDict() output = [] file_buffer = "" file_buffer_stripped = "" handler = None """ Scan the file content """ task_data["file_sig"] = "" with open(self.file, 'rb') as file_content: file_buffer = file_content.read() task_data["file_sig"] = task.scanner["yara"].scan_buffer( file_buffer) # Scan stripped file content if previous match was not found if not task_data["file_sig"]: try: file_buffer_stripped = self._strip_keycodes( file_buffer.decode("utf8")) task_data["file_sig"] = task.scanner["yara"].scan_buffer( file_buffer_stripped) except UnicodeDecodeError: logger.warning( f"UnicodeDecodeError: Failed to decode -> {self.file}") """ Get the right handler by file_sig """ handler = self._get_handler(task_data["file_sig"]) if handler: if file_buffer_stripped: handler(self, file_buffer_stripped, task_data) else: handler(self, file_buffer, task_data) """ Get info about all objects available """ _objects = list(rtfobj.rtf_iter_objects(self.file)) task_data["obj_count"] = len(_objects) task_data["body_ioc_strings"] = task.scanner["regex"].ioc_scan( file_buffer.decode("utf8")) """ Update Task's IOCs """ task.update_ioc(task_data["body_ioc_strings"]) task_data["body_text"] = self._striprtf(file_buffer.decode("utf8")) if _objects: for offset, orig_len, data in _objects: _object["obj_offset"] = '0x%08X' % offset try: _oleobj = oleobj.OleObject() _oleobj.parse(data) _object["ole_type"] = str(_oleobj.class_name) _object["ole_size"] = _oleobj.data_size except Exception: _object["ole_type"] = "" _object["ole_size"] = "" _object["obj_size"] = len(data) _object["obj_sig"] = str(data[:self.obj_sig_len]) try: unique_strings = "" data_str = data.decode(errors='ignore') unique_strings = "\r".join( list( set( re.findall("[^\x00-\x1F\x7F-\xFF]{3,}", data_str)))) except Exception: unique_strings = "" _object["ole_strings"] = unique_strings """ Scan the data object content """ ole_yarasig = "" ole_yarasig = task.scanner["yara"].scan_buffer(data) _object["ole_yara_sig"] = ole_yarasig matched_strings = "" matched_strings = task.scanner["regex"].ioc_scan( unique_strings) _object["ole_regex_strings"] = matched_strings """ Update Task's IOCs """ task.update_ioc(_object["ole_regex_strings"]) output.append(_object.copy()) _object.clear() else: logger.warning(f"No objects found. File: {self.file}") """ Properly close the task before returning from the function""" task_data["objects"] = output self.end(task, task_data)