def _parse(self, filepath): """Parses an office document for static information. Currently (as per olefile) the following formats are supported: - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm) - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) - PowerPoint 2007+ (.pptm, .ppsm) @param filepath: Path to the file to be analyzed. @return: results dict or None """ results = dict() vba = VBA_Parser(filepath) results["Metadata"] = dict() # The bulk of the metadata checks are in the OLE Structures # So don't check if we're dealing with XML. if olefile.isOleFile(filepath): ole = olefile.OleFileIO(filepath) meta = ole.get_metadata() results["Metadata"] = meta.get_meta() # Fix up some output formatting buf = self.convert_dt_string(results["Metadata"]["SummaryInformation"]["create_time"]) results["Metadata"]["SummaryInformation"]["create_time"] = buf buf = self.convert_dt_string(results["Metadata"]["SummaryInformation"]["last_saved_time"]) results["Metadata"]["SummaryInformation"]["last_saved_time"] = buf ole.close() if vba.detect_vba_macros(): results["Metadata"]["HasMacros"] = "Yes" results["Macro"] = dict() results["Macro"]["Code"] = dict() ctr = 0 # Create IOC and category vars. We do this before processing the # macro(s) to avoid overwriting data when there are multiple # macros in a single file. results["Macro"]["Analysis"] = dict() results["Macro"]["Analysis"]["AutoExec"] = list() results["Macro"]["Analysis"]["Suspicious"] = list() results["Macro"]["Analysis"]["IOCs"] = list() results["Macro"]["Analysis"]["HexStrings"] = list() for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): vba_code = filter_vba(vba_code) if vba_code.strip() != "": # Handle all macros ctr += 1 outputname = "Macro" + str(ctr) results["Macro"]["Code"][outputname] = list() results["Macro"]["Code"][outputname].append( (convert_to_printable(vba_filename), convert_to_printable(vba_code)) ) autoexec = detect_autoexec(vba_code) suspicious = detect_suspicious(vba_code) iocs = vbadeobf.parse_macro(vba_code) hex_strs = detect_hex_strings(vba_code) if autoexec: for keyword, description in autoexec: results["Macro"]["Analysis"]["AutoExec"].append((keyword, description)) if suspicious: for keyword, description in suspicious: results["Macro"]["Analysis"]["Suspicious"].append((keyword, description)) if iocs: for pattern, match in iocs: results["Macro"]["Analysis"]["IOCs"].append((pattern, match)) if hex_strs: for encoded, decoded in hex_strs: results["Macro"]["Analysis"]["HexStrings"].append((encoded, decoded)) # Delete and keys which had no results. Otherwise we pollute the # Django interface with null data. if results["Macro"]["Analysis"]["AutoExec"] == []: del results["Macro"]["Analysis"]["AutoExec"] if results["Macro"]["Analysis"]["Suspicious"] == []: del results["Macro"]["Analysis"]["Suspicious"] if results["Macro"]["Analysis"]["IOCs"] == []: del results["Macro"]["Analysis"]["IOCs"] if results["Macro"]["Analysis"]["HexStrings"] == []: del results["Macro"]["Analysis"]["HexStrings"] else: results["Metadata"]["HasMacros"] = "No" oleid = OleID(filepath) indicators = oleid.check() for indicator in indicators: if indicator.name == "Word Document" and indicator.value == True: results["Metadata"]["DocumentType"] = indicator.name if indicator.name == "Excel Workbook" and indicator.value == True: results["Metadata"]["DocumentType"] = indicator.name if indicator.name == "PowerPoint Presentation" and indicator.value == True: results["Metadata"]["DocumentType"] = indicator.name return results
def _parse(self, filepath): """Parses an office document for static information. Currently (as per olefile) the following formats are supported: - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm) - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) - PowerPoint 2007+ (.pptm, .ppsm) @param filepath: Path to the file to be analyzed. @return: results dict or None """ results = dict() try: vba = VBA_Parser(filepath) except: return results results["Metadata"] = dict() # The bulk of the metadata checks are in the OLE Structures # So don't check if we're dealing with XML. if olefile.isOleFile(filepath): ole = olefile.OleFileIO(filepath) meta = ole.get_metadata() results["Metadata"] = meta.get_meta() # Fix up some output formatting buf = self.convert_dt_string( results["Metadata"]["SummaryInformation"]["create_time"]) results["Metadata"]["SummaryInformation"]["create_time"] = buf buf = self.convert_dt_string( results["Metadata"]["SummaryInformation"]["last_saved_time"]) results["Metadata"]["SummaryInformation"]["last_saved_time"] = buf ole.close() if vba.detect_vba_macros(): results["Metadata"]["HasMacros"] = "Yes" results["Macro"] = dict() results["Macro"]["Code"] = dict() ctr = 0 # Create IOC and category vars. We do this before processing the # macro(s) to avoid overwriting data when there are multiple # macros in a single file. results["Macro"]["Analysis"] = dict() results["Macro"]["Analysis"]["AutoExec"] = list() results["Macro"]["Analysis"]["Suspicious"] = list() results["Macro"]["Analysis"]["IOCs"] = list() results["Macro"]["Analysis"]["HexStrings"] = list() for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): vba_code = filter_vba(vba_code) if vba_code.strip() != '': # Handle all macros ctr += 1 outputname = "Macro" + str(ctr) results["Macro"]["Code"][outputname] = list() results["Macro"]["Code"][outputname].append( (convert_to_printable(vba_filename), convert_to_printable(vba_code))) autoexec = detect_autoexec(vba_code) suspicious = detect_suspicious(vba_code) iocs = vbadeobf.parse_macro(vba_code) hex_strs = detect_hex_strings(vba_code) if autoexec: for keyword, description in autoexec: results["Macro"]["Analysis"]["AutoExec"].append( (keyword, description)) if suspicious: for keyword, description in suspicious: results["Macro"]["Analysis"]["Suspicious"].append( (keyword, description)) if iocs: for pattern, match in iocs: results["Macro"]["Analysis"]["IOCs"].append( (pattern, match)) if hex_strs: for encoded, decoded in hex_strs: results["Macro"]["Analysis"]["HexStrings"].append( (encoded, decoded)) # Delete and keys which had no results. Otherwise we pollute the # Django interface with null data. if results["Macro"]["Analysis"]["AutoExec"] == []: del results["Macro"]["Analysis"]["AutoExec"] if results["Macro"]["Analysis"]["Suspicious"] == []: del results["Macro"]["Analysis"]["Suspicious"] if results["Macro"]["Analysis"]["IOCs"] == []: del results["Macro"]["Analysis"]["IOCs"] if results["Macro"]["Analysis"]["HexStrings"] == []: del results["Macro"]["Analysis"]["HexStrings"] else: results["Metadata"]["HasMacros"] = "No" oleid = OleID(filepath) indicators = oleid.check() for indicator in indicators: if indicator.name == "Word Document" and indicator.value == True: results["Metadata"]["DocumentType"] = indicator.name if indicator.name == "Excel Workbook" and indicator.value == True: results["Metadata"]["DocumentType"] = indicator.name if indicator.name == "PowerPoint Presentation" and indicator.value == True: results["Metadata"]["DocumentType"] = indicator.name return results