Example #1
0
    def _parse(self, filepath):
        """Parses an office document for static information.
        Currently (as per olefile) the following formats are supported:
        - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
        - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
        - PowerPoint 2007+ (.pptm, .ppsm)

        @param filepath: Path to the file to be analyzed.
        @return: results dict or None
        """

        results = dict()
        vba = VBA_Parser(filepath)
        results["Metadata"] = dict()
        # The bulk of the metadata checks are in the OLE Structures
        # So don't check if we're dealing with XML.
        if olefile.isOleFile(filepath):
            ole = olefile.OleFileIO(filepath)
            meta = ole.get_metadata()
            results["Metadata"] = meta.get_meta()
            # Fix up some output formatting
            buf = self.convert_dt_string(results["Metadata"]["SummaryInformation"]["create_time"])
            results["Metadata"]["SummaryInformation"]["create_time"] = buf
            buf = self.convert_dt_string(results["Metadata"]["SummaryInformation"]["last_saved_time"])
            results["Metadata"]["SummaryInformation"]["last_saved_time"] = buf
            ole.close()
        if vba.detect_vba_macros():
            results["Metadata"]["HasMacros"] = "Yes"
            results["Macro"] = dict()
            results["Macro"]["Code"] = dict()
            ctr = 0
            # Create IOC and category vars. We do this before processing the
            # macro(s) to avoid overwriting data when there are multiple
            # macros in a single file.
            results["Macro"]["Analysis"] = dict()
            results["Macro"]["Analysis"]["AutoExec"] = list()
            results["Macro"]["Analysis"]["Suspicious"] = list()
            results["Macro"]["Analysis"]["IOCs"] = list()
            results["Macro"]["Analysis"]["HexStrings"] = list()
            for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
                vba_code = filter_vba(vba_code)
                if vba_code.strip() != "":
                    # Handle all macros
                    ctr += 1
                    outputname = "Macro" + str(ctr)
                    results["Macro"]["Code"][outputname] = list()
                    results["Macro"]["Code"][outputname].append(
                        (convert_to_printable(vba_filename), convert_to_printable(vba_code))
                    )
                    autoexec = detect_autoexec(vba_code)
                    suspicious = detect_suspicious(vba_code)
                    iocs = vbadeobf.parse_macro(vba_code)
                    hex_strs = detect_hex_strings(vba_code)
                    if autoexec:
                        for keyword, description in autoexec:
                            results["Macro"]["Analysis"]["AutoExec"].append((keyword, description))
                    if suspicious:
                        for keyword, description in suspicious:
                            results["Macro"]["Analysis"]["Suspicious"].append((keyword, description))
                    if iocs:
                        for pattern, match in iocs:
                            results["Macro"]["Analysis"]["IOCs"].append((pattern, match))
                    if hex_strs:
                        for encoded, decoded in hex_strs:
                            results["Macro"]["Analysis"]["HexStrings"].append((encoded, decoded))
            # Delete and keys which had no results. Otherwise we pollute the
            # Django interface with null data.
            if results["Macro"]["Analysis"]["AutoExec"] == []:
                del results["Macro"]["Analysis"]["AutoExec"]
            if results["Macro"]["Analysis"]["Suspicious"] == []:
                del results["Macro"]["Analysis"]["Suspicious"]
            if results["Macro"]["Analysis"]["IOCs"] == []:
                del results["Macro"]["Analysis"]["IOCs"]
            if results["Macro"]["Analysis"]["HexStrings"] == []:
                del results["Macro"]["Analysis"]["HexStrings"]

        else:
            results["Metadata"]["HasMacros"] = "No"

        oleid = OleID(filepath)
        indicators = oleid.check()
        for indicator in indicators:
            if indicator.name == "Word Document" and indicator.value == True:
                results["Metadata"]["DocumentType"] = indicator.name
            if indicator.name == "Excel Workbook" and indicator.value == True:
                results["Metadata"]["DocumentType"] = indicator.name
            if indicator.name == "PowerPoint Presentation" and indicator.value == True:
                results["Metadata"]["DocumentType"] = indicator.name

        return results
Example #2
0
    def _parse(self, filepath):
        """Parses an office document for static information.
        Currently (as per olefile) the following formats are supported:
        - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
        - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
        - PowerPoint 2007+ (.pptm, .ppsm)

        @param filepath: Path to the file to be analyzed.
        @return: results dict or None
        """

        results = dict()
        try:
            vba = VBA_Parser(filepath)
        except:
            return results
        results["Metadata"] = dict()
        # The bulk of the metadata checks are in the OLE Structures
        # So don't check if we're dealing with XML.
        if olefile.isOleFile(filepath):
            ole = olefile.OleFileIO(filepath)
            meta = ole.get_metadata()
            results["Metadata"] = meta.get_meta()
            # Fix up some output formatting
            buf = self.convert_dt_string(
                results["Metadata"]["SummaryInformation"]["create_time"])
            results["Metadata"]["SummaryInformation"]["create_time"] = buf
            buf = self.convert_dt_string(
                results["Metadata"]["SummaryInformation"]["last_saved_time"])
            results["Metadata"]["SummaryInformation"]["last_saved_time"] = buf
            ole.close()
        if vba.detect_vba_macros():
            results["Metadata"]["HasMacros"] = "Yes"
            results["Macro"] = dict()
            results["Macro"]["Code"] = dict()
            ctr = 0
            # Create IOC and category vars. We do this before processing the
            # macro(s) to avoid overwriting data when there are multiple
            # macros in a single file.
            results["Macro"]["Analysis"] = dict()
            results["Macro"]["Analysis"]["AutoExec"] = list()
            results["Macro"]["Analysis"]["Suspicious"] = list()
            results["Macro"]["Analysis"]["IOCs"] = list()
            results["Macro"]["Analysis"]["HexStrings"] = list()
            for (subfilename, stream_path, vba_filename,
                 vba_code) in vba.extract_macros():
                vba_code = filter_vba(vba_code)
                if vba_code.strip() != '':
                    # Handle all macros
                    ctr += 1
                    outputname = "Macro" + str(ctr)
                    results["Macro"]["Code"][outputname] = list()
                    results["Macro"]["Code"][outputname].append(
                        (convert_to_printable(vba_filename),
                         convert_to_printable(vba_code)))
                    autoexec = detect_autoexec(vba_code)
                    suspicious = detect_suspicious(vba_code)
                    iocs = vbadeobf.parse_macro(vba_code)
                    hex_strs = detect_hex_strings(vba_code)
                    if autoexec:
                        for keyword, description in autoexec:
                            results["Macro"]["Analysis"]["AutoExec"].append(
                                (keyword, description))
                    if suspicious:
                        for keyword, description in suspicious:
                            results["Macro"]["Analysis"]["Suspicious"].append(
                                (keyword, description))
                    if iocs:
                        for pattern, match in iocs:
                            results["Macro"]["Analysis"]["IOCs"].append(
                                (pattern, match))
                    if hex_strs:
                        for encoded, decoded in hex_strs:
                            results["Macro"]["Analysis"]["HexStrings"].append(
                                (encoded, decoded))
            # Delete and keys which had no results. Otherwise we pollute the
            # Django interface with null data.
            if results["Macro"]["Analysis"]["AutoExec"] == []:
                del results["Macro"]["Analysis"]["AutoExec"]
            if results["Macro"]["Analysis"]["Suspicious"] == []:
                del results["Macro"]["Analysis"]["Suspicious"]
            if results["Macro"]["Analysis"]["IOCs"] == []:
                del results["Macro"]["Analysis"]["IOCs"]
            if results["Macro"]["Analysis"]["HexStrings"] == []:
                del results["Macro"]["Analysis"]["HexStrings"]

        else:
            results["Metadata"]["HasMacros"] = "No"

        oleid = OleID(filepath)
        indicators = oleid.check()
        for indicator in indicators:
            if indicator.name == "Word Document" and indicator.value == True:
                results["Metadata"]["DocumentType"] = indicator.name
            if indicator.name == "Excel Workbook" and indicator.value == True:
                results["Metadata"]["DocumentType"] = indicator.name
            if indicator.name == "PowerPoint Presentation" and indicator.value == True:
                results["Metadata"]["DocumentType"] = indicator.name

        return results