def unpack(self, data): parser = RtfObjParser(data) parser.parse() width = len(str(len(parser.objects))) for k, item in enumerate(parser.objects): item: RtfObject path = item.filename or F'carve{k:0{width}}.bin' data = item.rawdata meta = {} if item.is_ole: if item.format_id == OleObject.TYPE_EMBEDDED: meta['ole_type'] = 'EMBEDDED' elif item.format_id == OleObject.TYPE_LINKED: meta['ole_type'] = 'LINKED' if item.is_package: meta['src_path'] = item.src_path meta['tmp_path'] = item.temp_path if item.clsid is not None: meta['ole_info'] = item.clsid_desc meta['ole_guid'] = item.clsid meta['ole_name'] = item.class_name if item.oledata: data = item.oledata pos = item.rawdata.find(data) if pos > 0: meta['raw_header'] = item.rawdata[:pos] if item.olepkgdata: data = item.olepkgdata pos = item.oledata.find(data) if pos >= 0: meta['ole_header'] = item.oledata[:pos] yield UnpackResult(path, data, **meta)
def save_ole_objects(self, data, save_object, filename): ''' The bulk of this fuction is taken from python-oletools: https://github.com/decalage2/oletools/blob/master/oletools/rtfobj.py See link for license ''' rtfp = RtfObjParser(data) rtfp.parse() try: i = int(save_object) objects = [rtfp.objects[i]] except Exception as ex: self.log( 'error', 'The -s option must be followed by an object index, such as "-s 2"\n{ex}' .format(ex=ex)) return for rtfobj in objects: i = objects.index(rtfobj) tmp = tempfile.NamedTemporaryFile(delete=False) if rtfobj.is_package: self.log('info', 'Saving file from OLE Package in object #%d:' % i) self.log('info', ' Filename = %r' % rtfobj.filename) self.log('info', ' Source path = %r' % rtfobj.src_path) self.log('info', ' Temp path = %r' % rtfobj.temp_path) self.log('info', ' saving to file %s' % tmp.name) self.log('info', ' md5 %s' % rtfobj.olepkgdata_md5) tmp.write(rtfobj.olepkgdata) tmp.close() # When format_id=TYPE_LINKED, oledata_size=None elif rtfobj.is_ole and rtfobj.oledata_size is not None: self.log('info', 'Saving file embedded in OLE object #%d:' % i) self.log('info', ' format_id = %d' % rtfobj.format_id) self.log('info', ' class name = %r' % rtfobj.class_name) self.log('info', ' data size = %d' % rtfobj.oledata_size) # set a file extension according to the class name: self.log('info', ' saving to file %s' % tmp.name) self.log('info', ' md5 %s' % rtfobj.oledata_md5) tmp.write(rtfobj.oledata) tmp.close() else: self.log('info', 'Saving raw data in object #%d:' % i) self.log('info', ' saving object to file %s' % tmp.name) self.log('info', ' md5 %s' % rtfobj.rawdata_md5) tmp.write(rtfobj.rawdata) tmp.close() if not save_object == 'all': __sessions__.new(tmp.name)
def run(self): results = {} rtfobj_results = {} binary = get_binary(self.job_id) rtfp = RtfObjParser(binary) rtfp.parse() rtfobj_results["ole_objects"] = [] for rtfobj in rtfp.objects: if rtfobj.is_ole: class_name = rtfobj.class_name.decode() ole_dict = { "format_id": rtfobj.format_id, "class_name": class_name, "ole_datasize": rtfobj.oledata_size, } if rtfobj.is_package: ole_dict["is_package"] = True ole_dict["filename"] = rtfobj.filename ole_dict["src_path"] = rtfobj.src_path ole_dict["temp_path"] = rtfobj.temp_path ole_dict["olepkgdata_md5"] = rtfobj.olepkgdata_md5 else: ole_dict["ole_md5"] = rtfobj.oledata_md5 if rtfobj.clsid: ole_dict["clsid_desc"] = rtfobj.clsid_desc ole_dict["clsid_id"] = rtfobj.clsid rtfobj_results["ole_objects"].append(ole_dict) # http://www.kb.cert.org/vuls/id/921560 if class_name == "OLE2Link": rtfobj_results["exploit_ole2link_vuln"] = True # https://www.kb.cert.org/vuls/id/421280/ elif class_name.lower() == "equation.3": rtfobj_results["exploit_equation_editor"] = True results["rtfobj"] = rtfobj_results return results
def analyze_objects(self, path): data = None with io.open(path, 'rb') as fh: data = fh.read() parser = RtfObjParser(data) parser.parse() for idx, rtfobj in enumerate(parser.objects): if rtfobj.is_ole: if rtfobj.format_id == oleobj.OleObject.TYPE_EMBEDDED: obj_type = '{} (Embedded)'.format(rtfobj.format_id) elif rtfobj.format_id == oleobj.OleObject.TYPE_LINKED: obj_type = '{} (Linked)'.format(rtfobj.format_id) else: obj_type = '{} (Unknown)'.format(rtfobj.format_id) if rtfobj.is_package: obj_html_class = 'suspicious' _, ext = os.path.splitext(rtfobj.filename) if re_executable_extensions.match(ext): obj_html_class = 'malicious' else: obj_html_class = 'info' try: if rtfobj.clsid: obj_clsid = rtfobj.clsid if rtfobj.clsid_desc: obj_clsid_desc = rtfobj.clsid_desc if 'CVE' in obj_clsid_desc: obj_html_class = 'malicious' else: obj_clsid = 'Not available' obj_clsid_desc = 'No CLSID related description available.' except AttributeError: obj_clsid = 'Not available' obj_clsid_desc = 'No CLSID related description available.' if 'equation' in str(rtfobj.class_name).lower(): obj_clsid_desc += ' (The class name suggests an Equation Editor referencing OLE object.)' obj_html_class = 'malicious' self.add_result_subsection( 'OLE object #{}'.format(idx), { 'address': '{}'.format(hex(rtfobj.start)), 'class': obj_html_class, 'type': obj_type, 'filename': rtfobj.filename if rtfobj.filename else 'Not available' , 'classname': str(rtfobj.class_name) if rtfobj.class_name else 'Not available', 'size': rtfobj.oledata_size, 'clsid': obj_clsid, 'clsid_description': obj_clsid_desc, 'source_path': rtfobj.src_path if rtfobj.src_path else 'Not available', 'temp_path': rtfobj.temp_path if rtfobj.temp_path else 'Not available' } ) else: self.add_result_subsection( '(Non) OLE object #{}'.format(idx), { 'index': '0x{}'.format(rtfobj.start), 'class': 'info', 'type': 'Not a valid OLE object', } )
def run(analyzer_name, job_id, filepath, filename, md5, additional_config_params): logger.info("started analyzer {} job_id {}" "".format(analyzer_name, job_id)) report = general.get_basic_report_template(analyzer_name) try: results = {} rtfobj_results = {} binary = general.get_binary(job_id) rtfp = RtfObjParser(binary) rtfp.parse() rtfobj_results["ole_objects"] = [] for rtfobj in rtfp.objects: if rtfobj.is_ole: class_name = rtfobj.class_name.decode() ole_dict = { "format_id": rtfobj.format_id, "class_name": class_name, "ole_datasize": rtfobj.oledata_size, } if rtfobj.is_package: ole_dict["is_package"] = True ole_dict["filename"] = rtfobj.filename ole_dict["src_path"] = rtfobj.src_path ole_dict["temp_path"] = rtfobj.temp_path ole_dict["olepkgdata_md5"] = rtfobj.olepkgdata_md5 else: ole_dict["ole_md5"] = rtfobj.oledata_md5 if rtfobj.clsid: ole_dict["clsid_desc"] = rtfobj.clsid_desc ole_dict["clsid_id"] = rtfobj.clsid rtfobj_results["ole_objects"].append(ole_dict) # http://www.kb.cert.org/vuls/id/921560 if class_name == "OLE2Link": rtfobj_results["exploit_ole2link_vuln"] = True # https://www.kb.cert.org/vuls/id/421280/ elif class_name.lower() == "equation.3": rtfobj_results["exploit_equation_editor"] = True results["rtfobj"] = rtfobj_results report["report"] = results except AnalyzerRunException as e: error_message = ( "job_id:{} analyzer:{} md5:{} filename: {} Analyzer Error {}" "".format(job_id, analyzer_name, md5, filename, e)) logger.error(error_message) report["errors"].append(error_message) report["success"] = False except Exception as e: traceback.print_exc() error_message = ( "job_id:{} analyzer:{} md5:{} filename: {} Unexpected Error {}" "".format(job_id, analyzer_name, md5, filename, e)) logger.exception(error_message) report["errors"].append(str(e)) report["success"] = False else: report["success"] = True general.set_report_and_cleanup(job_id, report) logger.info("ended analyzer {} job_id {}" "".format(analyzer_name, job_id)) return report
def parse_rtf(self, filename, data): ''' The bulk of this fuction is taken from python-oletools: https://github.com/decalage2/oletools/blob/master/oletools/rtfobj.py See link for license ''' self.log('success', 'File: {name} - size: {size} bytes'.format(name=filename, size=hex(len(data)))) table = [] h = ['id', 'index', 'OLE Object'] rtfp = RtfObjParser(data) rtfp.parse() for rtfobj in rtfp.objects: row = [] obj_col = [] if rtfobj.is_ole: obj_col.append('format_id: {id} '.format(id=rtfobj.format_id)) if rtfobj.format_id == oleobj.OleObject.TYPE_EMBEDDED: obj_col.append('(Embedded)') elif rtfobj.format_id == oleobj.OleObject.TYPE_LINKED: obj_col.append('(Linked)') else: obj_col.append('(Unknown)') obj_col.append('class name: {cls}'.format(cls=rtfobj.class_name)) # if the object is linked and not embedded, data_size=None: if rtfobj.oledata_size is None: obj_col.append('data size: N/A') else: obj_col.append('data size: %d' % rtfobj.oledata_size) if rtfobj.is_package: obj_col.append('OLE Package object:') obj_col.append('Filename: {name}'.format(name=rtfobj.filename)) obj_col.append('Source path: {path}'.format(path=rtfobj.src_path)) obj_col.append('Temp path = {path}'.format(path=rtfobj.temp_path)) obj_col.append('MD5 = {md5}'.format(md5=rtfobj.olepkgdata_md5)) # check if the file extension is executable: _, temp_ext = os.path.splitext(rtfobj.temp_path) self.log('debug', 'Temp path extension: {ext}'.format(ext=temp_ext)) _, file_ext = os.path.splitext(rtfobj.filename) self.log.debug('File extension: %r' % file_ext) if temp_ext != file_ext: obj_col.append("MODIFIED FILE EXTENSION") else: obj_col.append('MD5 = {md5}'.format(md5=rtfobj.oledata_md5)) if rtfobj.clsid is not None: obj_col.append('CLSID: {clsid}'.format(clsid=rtfobj.clsid)) obj_col.append(rtfobj.clsid_desc) # Detect OLE2Link exploit # http://www.kb.cert.org/vuls/id/921560 if rtfobj.class_name == b'OLE2Link': obj_col.append('Possibly an exploit for the OLE2Link vulnerability (VU#921560, CVE-2017-0199)') # Detect Equation Editor exploit # https://www.kb.cert.org/vuls/id/421280/ elif rtfobj.class_name.lower() == b'equation.3': obj_col.append('Possibly an exploit for the Equation Editor vulnerability (VU#421280, CVE-2017-11882)') else: obj_col.append('Not a well-formed OLE object') row.append(rtfp.objects.index(rtfobj)) row.append('%08Xh' % rtfobj.start) row.append('\n'.join(obj_col)) table.append(row) self.log('table', dict(rows=table, header=h))
def _parse_rtf(self, data: bytes) -> Dict[str, list]: results = {} rtfp = RtfObjParser(data) rtfp.parse() save_dir = os.path.join(CUCKOO_ROOT, "storage", "analyses", self.task_id, "rtf_objects") if rtfp.objects and not os.path.exists(save_dir): os.makedirs(save_dir) for rtfobj in rtfp.objects: results.setdefault(str(rtfobj.format_id), []) temp_dict = { "class_name": "", "size": "", "filename": "", "type_embed": "", "CVE": "", "sha256": "", "index": "" } if rtfobj.is_package: log.debug("Saving file from OLE Package in object #%d:", rtfobj.format_id) log.debug(" Filename = %s", rtfobj.filename) log.debug(" Source path = %s", rtfobj.src_path) log.debug(" Temp path = %s", rtfobj.temp_path) sha256 = hashlib.sha256(rtfobj.olepkgdata).hexdigest() fname = convert_to_printable( rtfobj.filename) if rtfobj.filename else sha256 log.debug(" Saving to file %s", sha256) temp_dict["filename"] = fname with open(os.path.join(save_dir, sha256), "wb") as f: f.write(rtfobj.olepkgdata) temp_dict["sha256"] = sha256 temp_dict["size"] = len(rtfobj.olepkgdata) # temp_dict["source_path"] = convert_to_printable(rtfobj.src_path)) elif rtfobj.is_ole and rtfobj.oledata_size is not None: ole_object_mapping = { oleobj.OleObject.TYPE_EMBEDDED: "Embedded", oleobj.OleObject.TYPE_LINKED: "Linked" } temp_dict["type_embed"] = ole_object_mapping.get( rtfobj.format_id, "Unknown") if getattr(rtfobj, "clsid", None) is not None and "CVE" in rtfobj.clsid_desc: temp_dict["CVE"] = rtfobj.clsid_desc # Detect OLE2Link exploit # http://www.kb.cert.org/vuls/id/921560 if rtfobj.class_name == b"OLE2Link": temp_dict[ "CVE"] = "Possibly an exploit for the OLE2Link vulnerability (VU#921560, CVE-2017-0199)" log.debug("Saving file embedded in OLE object #%d:", rtfobj.format_id) log.debug(" format_id = %d", rtfobj.format_id) log.debug(" class name = %s", rtfobj.class_name) log.debug(" data size = %d", rtfobj.oledata_size) class_name = rtfobj.class_name.decode("ascii", "ignore").encode("ascii") temp_dict["class_name"] = convert_to_printable(class_name) temp_dict["size"] = rtfobj.oledata_size # set a file extension according to the class name: class_name = rtfobj.class_name.lower() if class_name.startswith(b"word"): ext = "doc" elif class_name.startswith(b"package"): ext = "package" else: ext = "bin" sha256 = hashlib.sha256(rtfobj.oledata).hexdigest() temp_dict["filename"] = f"object_{rtfobj.start:08X}.{ext}" save_path = os.path.join(save_dir, sha256) log.debug(" Saving to file %s", sha256) with open(save_path, "wb") as f: f.write(rtfobj.oledata) temp_dict["sha256"] = sha256 else: log.debug("Saving raw data in object #%d:", rtfobj.format_id) temp_dict["filename"] = f"object_{rtfobj.start:08X}.raw" sha256 = hashlib.sha256(rtfobj.rawdata).hexdigest() save_path = os.path.join(save_dir, sha256) log.debug(" Saving object to file %s", sha256) with open(save_path, "wb") as f: f.write(rtfobj.rawdata) temp_dict["sha256"] = sha256 temp_dict["size"] = len(rtfobj.rawdata) temp_dict["index"] = f"{rtfobj.start:08X}h" results[str(rtfobj.format_id)].append(temp_dict) return results
def module_rtf(file, md5, tool, args): has_objects = False objects_result = 'No embedded objects.' has_shellcode = False shellcode_result = 'No shellcode.' has_ole = False ole_result = 'No OLE package.' has_pe = False pe_result = 'No executable.' indicators = [] print_output(file, '\n\n-----------------------------------------\n[Scanning for embedded objects in RTF]\n-----------------------------------------\n', 'text') # Read file for parsing data = open(filescanner_proc_dir+file, 'rb').read() rtfp = RtfObjParser(data) rtfp.parse() for rtfobj in rtfp.objects: if rtfobj.is_ole: print_output(file, '[-] FOUND OLE OBJECT format_id: {} class name: {} size: {}'.format(rtfobj.format_id, rtfobj.class_name, rtfobj.oledata_size), 'shell') has_objects = True objects_result = 'EMBEDDED OBJECTS FOUND.' indicators.append(rtfobj.class_name) if rtfobj.is_package: print_output(file, '[-] OLE PACKAGE filename: {} source path: {} temp path: {}'.format(rtfobj.filename, rtfobj.src_path, rtfobj.temp_path), 'shell') has_ole = True ole_result = 'OLE PACKAGE' indicators.append(rtfobj.filename) # Check if the file extension is executable: objname, ext = os.path.splitext(rtfobj.filename) if re_executable_extensions.match(ext): print_output(file, '[!] EXECUTABLE FILE', 'shell') has_pe = True pe_result = 'EXECUTABLE.' else: print_output(file, '[-] Not an OLE Package', 'text') else: print_output(file, '[-] Not a well-formed OLE object', 'text') rtfdump_scan(file, '-i -f O') if filescan.yara_scan(file, md5, tool, args, filescanner_proc_dir): has_shellcode = True shellcode_result = 'SHELLCODE FOUND.' # Determine risk level and overall results if has_objects == True and (has_ole == True or has_shellcode == True or has_pe == True): risk = 'High Risk' elif has_objects == True: risk = 'Medium Risk' result = objects_result + ' ' + ole_result + ' ' + shellcode_result + ' ' + pe_result ## # OUTPUT ## output[module] = {'result' : result, 'risk' : risk, 'indicators' : indicators } output[module]['additional_info'] = {'md5' : md5} return output
def analyze_objects(self, path): data = None with io.open(path, "rb") as fh: data = fh.read() parser = RtfObjParser(data) parser.parse() for idx, rtfobj in enumerate(parser.objects): if rtfobj.is_ole: if rtfobj.format_id == oleobj.OleObject.TYPE_EMBEDDED: obj_type = "{} (Embedded)".format(rtfobj.format_id) elif rtfobj.format_id == oleobj.OleObject.TYPE_LINKED: obj_type = "{} (Linked)".format(rtfobj.format_id) else: obj_type = "{} (Unknown)".format(rtfobj.format_id) if rtfobj.is_package: obj_html_class = "suspicious" _, ext = os.path.splitext(rtfobj.filename) if re_executable_extensions.match(ext): obj_html_class = "malicious" else: obj_html_class = "info" try: if rtfobj.clsid: obj_clsid = rtfobj.clsid if rtfobj.clsid_desc: obj_clsid_desc = rtfobj.clsid_desc if "CVE" in obj_clsid_desc: obj_html_class = "malicious" else: obj_clsid = "Not available" obj_clsid_desc = "No CLSID related description available." except AttributeError: obj_clsid = "Not available" obj_clsid_desc = "No CLSID related description available." if "equation" in str(rtfobj.class_name).lower(): obj_clsid_desc += " (The class name suggests an Equation Editor referencing OLE object.)" obj_html_class = "malicious" self.add_result_subsection( "OLE object #{}".format(idx), { "address": "{}".format(hex(rtfobj.start)), "class": obj_html_class, "type": obj_type, "filename": rtfobj.filename if rtfobj.filename else "Not available", "classname": str(rtfobj.class_name) if rtfobj.class_name else "Not available", "size": rtfobj.oledata_size, "clsid": obj_clsid, "clsid_description": obj_clsid_desc, "source_path": rtfobj.src_path if rtfobj.src_path else "Not available", "temp_path": rtfobj.temp_path if rtfobj.temp_path else "Not available", }, ) else: self.add_result_subsection( "(Non) OLE object #{}".format(idx), { "index": "0x{}".format(rtfobj.start), "class": "info", "type": "Not a valid OLE object", }, )
def analyze_objects(self, path): data = None with io.open(path, 'rb') as fh: data = fh.read() parser = RtfObjParser(data) parser.parse() for idx, rtfobj in enumerate(parser.objects): if rtfobj.is_ole: if rtfobj.format_id == oleobj.OleObject.TYPE_EMBEDDED: obj_type = '{} (Embedded)'.format(rtfobj.format_id) elif rtfobj.format_id == oleobj.OleObject.TYPE_LINKED: obj_type = '{} (Linked)'.format(rtfobj.format_id) else: obj_type = '{} (Unknown)'.format(rtfobj.format_id) if rtfobj.is_package: obj_html_class = 'suspicious' _, ext = os.path.splitext(rtfobj.filename) if re_executable_extensions.match(ext): obj_html_class = 'malicious' else: obj_html_class = 'info' try: if rtfobj.clsid: obj_clsid = rtfobj.clsid if rtfobj.clsid_desc: obj_clsid_desc = rtfobj.clsid_desc if 'CVE' in obj_clsid_desc: obj_html_class = 'malicious' else: obj_clsid = 'Not available' obj_clsid_desc = 'No CLSID related description available.' except AttributeError: obj_clsid = 'Not available' obj_clsid_desc = 'No CLSID related description available.' if 'equation' in str(rtfobj.class_name).lower(): obj_clsid_desc += ' (The class name suggests an Equation Editor referencing OLE object.)' obj_html_class = 'malicious' self.add_result_subsection( 'OLE object #{}'.format(idx), { 'address': '{}'.format(hex(rtfobj.start)), 'class': obj_html_class, 'type': obj_type, 'filename': rtfobj.filename if rtfobj.filename else 'Not available', 'classname': str(rtfobj.class_name) if rtfobj.class_name else 'Not available', 'size': rtfobj.oledata_size, 'clsid': obj_clsid, 'clsid_description': obj_clsid_desc, 'source_path': rtfobj.src_path if rtfobj.src_path else 'Not available', 'temp_path': rtfobj.temp_path if rtfobj.temp_path else 'Not available' }) else: self.add_result_subsection( '(Non) OLE object #{}'.format(idx), { 'index': '0x{}'.format(rtfobj.start), 'class': 'info', 'type': 'Not a valid OLE object', })
def get_rtf_objects(): with open('/sample', 'rb') as f: data = f.read() rtfp = RtfObjParser(data) rtfp.parse() out_data = [] tags = [] cve_regex = re.compile(' CVE-(\d{4}-\d+)') for rtfobj in rtfp.objects: if rtfobj.is_ole: tags.append('ole') ole_column = {'format_id': rtfobj.format_id} if rtfobj.format_id == oleobj.OleObject.TYPE_EMBEDDED: ole_column['format_type'] = 'embedded' elif rtfobj.format_id == oleobj.OleObject.TYPE_LINKED: ole_column['format_type'] = 'linked' else: ole_column['format_type'] = 'unknown' ole_column['class_name'] = rtfobj.class_name # if the object is linked and not embedded, data_size=None: if rtfobj.oledata_size is None: ole_column['data_size'] = -1 else: ole_column['data_size'] = rtfobj.oledata_size if rtfobj.is_package: ole_column['package'] = {} ole_column['package']['filename'] = rtfobj.filename ole_column['package']['source_path'] = rtfobj.src_path ole_column['package']['temp_path'] = rtfobj.temp_path # check if the file extension is executable: _, ext = os.path.splitext(rtfobj.filename) if re_executable_extensions.match(ext): ole_column['package']['executable'] = True # else: # pkg_column = 'Not an OLE Package' if rtfobj.clsid is not None: ole_column['CLSID'] = rtfobj.clsid ole_column['CLSID_desc'] = rtfobj.clsid_desc for match in cve_regex.findall(rtfobj.clsid_desc.upper()): tags.append('cve-{}'.format(match)) # Detect OLE2Link exploit # http://www.kb.cert.org/vuls/id/921560 if rtfobj.class_name == b'OLE2Link': ole_column[ 'exploits'] = 'Possibly an exploit for the OLE2Link vulnerability (VU#921560, CVE-2017-0199)' tags.append('cve-2017-0199') else: ole_column = {'error': 'Not a well-formed OLE object'} out_data.append({ 'id': rtfp.objects.index(rtfobj), 'index': rtfobj.start, 'ole_object': ole_column }) return out_data, tags