Exemple #1
0
def process_file_scanexpr (container, filename, data):
    """
    Process a single file

    :param container: str, path and filename of container if the file is within
    a zip archive, None otherwise.
    :param filename: str, path and filename of file on disk, or within the container.
    :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
    """
    #TODO: replace print by writing to a provided output file (sys.stdout by default)
    if container:
        display_filename = '%s in %s' % (filename, container)
    else:
        display_filename = filename
    print '='*79
    print 'FILE:', display_filename
    all_code = ''
    try:
        #TODO: handle olefile errors, when an OLE file is malformed
        vba = VBA_Parser(filename, data)
        print 'Type:', vba.type
        if vba.detect_vba_macros():
            #print 'Contains VBA Macros:'
            for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
                # hide attribute lines:
                #TODO: option to disable attribute filtering
                vba_code_filtered = filter_vba(vba_code)
                print '-'*79
                print 'VBA MACRO %s ' % vba_filename
                print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))
                print '- '*39
                # detect empty macros:
                if vba_code_filtered.strip() == '':
                    print '(empty macro)'
                else:
                    # TODO: option to display code
                    print vba_code_filtered
                    vba_code = vba_collapse_long_lines(vba_code)
                    all_code += '\n' + vba_code
            print '-'*79
            print 'EVALUATED VBA EXPRESSIONS:'
            t = prettytable.PrettyTable(('Obfuscated expression', 'Evaluated value'))
            t.align = 'l'
            t.max_width['Obfuscated expression'] = 36
            t.max_width['Evaluated value'] = 36
            for expression, expr_eval in scan_expressions(all_code):
                t.add_row((repr(expression), repr(expr_eval)))
            print t


        else:
            print 'No VBA macros found.'
    except: #TypeError:
        #raise
        #TODO: print more info if debug mode
        #print sys.exc_value
        # display the exception with full stack trace for debugging, but do not stop:
        traceback.print_exc()
    print ''
Exemple #2
0
def parse_stream(subfilename,
                 stream_path=None,
                 vba_filename=None,
                 vba_code=None,
                 strip_useless=False):

    # Are the arguments all in a single tuple?
    if (stream_path is None):
        subfilename, stream_path, vba_filename, vba_code = subfilename

    # Collapse long lines.
    vba_code = vba_collapse_long_lines(vba_code)

    # Filter cruft from the VBA.
    vba_code = filter_vba(vba_code)
    if (strip_useless):
        vba_code = strip_useless_code(vba_code)
    print '-' * 79
    print 'VBA MACRO %s ' % vba_filename
    print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))
    print '- ' * 39

    # Parse the macro.
    m = None
    if vba_code.strip() == '':
        print '(empty macro)'
        m = "empty"
    else:
        print '-' * 79
        print 'VBA CODE (with long lines collapsed):'
        print vba_code
        print '-' * 79
        print 'PARSING VBA CODE:'
        try:

            # Enable PackRat for better performance:
            # (see https://pythonhosted.org/pyparsing/pyparsing.ParserElement-class.html#enablePackrat)
            ParserElement.enablePackrat()
            m = module.parseString(vba_code + "\n", parseAll=True)[0]
            m.code = vba_code
        except ParseException as err:
            print err.line
            print " " * (err.column - 1) + "^"
            print err
            print "Parse Error. Processing Aborted."
            return None

    # Return the parsed macro.
    return m
Exemple #3
0
    def extract_macro(self):
        vba = olevba.VBA_Parser(self.sample)
        macro_code = ""

        if vba.detect_vba_macros():
            for (filename, stream_path, vba_filename,
                 vba_code) in vba.extract_macros():
                macro_code += olevba.filter_vba(vba_code)

            self.results["analysis"] = vba.analyze_macros()

            self.results["code"] = macro_code
            vba.close()
            return self.results

        vba.close()
        return False
def parse_stream(subfilename,
                 stream_path=None,
                 vba_filename=None,
                 vba_code=None,
                 strip_useless=False,
                 local_funcs=None):
    """Parse the macros from a single OLE stream.

    @param subfilename (str) The name of the file containing the    
    macros.

    @param stream_path (??) ??

    @param vba_filename (??) ??

    @param vba_code (str) The macro code to parse.

    @param local_funcs (list) A list of the names of already declared
    local VBA functions.

    @return (Module object) A parsed module object.

    """
    
    # Set local func list if needed.
    if (local_funcs is None):
        local_funcs = []
    
    # Check for timeouts.
    core.vba_object.limits_exceeded(throw_error=True)
    
    # Are the arguments all in a single tuple?
    if (stream_path is None):
        subfilename, stream_path, vba_filename, vba_code = subfilename

    # Skip old-style XLM macros.
    if (repr(stream_path).strip() == "'xlm_macro'"):
        log.warning("Skipping XLM macro stream...")
        return "empty"
        
    # Collapse long lines.
    vba_code = core.vba_collapse_long_lines(vba_code)
        
    # Filter cruft from the VBA.
    vba_code = filter_vba(vba_code)

    # Pull out Visual Basic from .hta contents (if we are looking at a
    # .hta file).
    vba_code = get_vb_contents_from_hta(vba_code)

    # Do not analyze the file if the VBA looks like garbage characters.
    if (read_ole_fields.is_garbage_vba(vba_code, no_html=True)):
        log.warning("Failed to extract VBScript from HTA. Skipping.")
        return "empty"
        
    # Skip some XML that olevba gives for some 2007+ streams.
    if (vba_code.strip().startswith("<?xml")):
        log.warning("Skipping XML stream.")
        return "empty"
    
    # Strip out code that does not affect the end result of the program.
    if (strip_useless):
        vba_code = core.strip_lines.strip_useless_code(vba_code, local_funcs)
    safe_print('-'*79)
    safe_print('VBA MACRO %s ' % vba_filename)
    safe_print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)))
    safe_print('- '*39)
    
    # Parse the macro.
    m = None
    if vba_code.strip() == '':
        safe_print('(empty macro)')
        m = "empty"
    else:
        safe_print('-'*79)
        safe_print('VBA CODE (with long lines collapsed):')
        safe_print(vba_code)
        safe_print('-'*79)
        #sys.exit(0)
        safe_print('PARSING VBA CODE:')
        try:
            m = core.module.parseString(vba_code + "\n", parseAll=True)[0]
            pyparsing.ParserElement.resetCache()
            m.code = vba_code
        except pyparsing.ParseException as err:
            safe_print(err.line)
            safe_print(" "*(err.column-1) + "^")
            safe_print(err)
            log.error("Parse Error. Processing Aborted.")
            return None

    # Check for timeouts.
    core.vba_object.limits_exceeded(throw_error=True)
        
    # Return the parsed macro.
    return m
def process_file_scanexpr (container, filename, data):
    """Process a single file.

    @param container (str) Path and filename of container if the file is within
    a zip archive, None otherwise.

    @param filename (str) path and filename of file on disk, or within
    the container.

    @param data (bytes) Content of the file if it is in a container,
    None if it is a file on disk.

    """
    #TODO: replace print by writing to a provided output file (sys.stdout by default)
    if container:
        display_filename = '%s in %s' % (filename, container)
    else:
        display_filename = filename
    safe_print('='*79)
    safe_print('FILE: ' + safe_str_convert(display_filename))
    all_code = ''
    try:
        #TODO: handle olefile errors, when an OLE file is malformed
        import oletools
        oletools.olevba.enable_logging()
        if (log.getEffectiveLevel() == logging.DEBUG):
            log.debug('opening %r' % filename)
        vba = VBA_Parser(filename, data, relaxed=True)
        if vba.detect_vba_macros():

            # Read in document metadata.
            vm = core.ViperMonkey(filename, data)
            ole = olefile.OleFileIO(filename)
            try:
                vm.set_metadata(ole.get_metadata())
            except Exception as e:
                log.warning("Reading in metadata failed. Trying fallback. " + safe_str_convert(e))
                vm.set_metadata(get_metadata_exif(filename))
            
            #print 'Contains VBA Macros:'
            for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
                # hide attribute lines:
                #TODO: option to disable attribute filtering
                vba_code = filter_vba(vba_code)
                safe_print('-'*79)
                safe_print('VBA MACRO %s ' % vba_filename)
                safe_print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)))
                safe_print('- '*39)
                # detect empty macros:
                if vba_code.strip() == '':
                    safe_print('(empty macro)')
                else:
                    # TODO: option to display code
                    safe_print(vba_code)
                    vba_code = core.vba_collapse_long_lines(vba_code)
                    all_code += '\n' + vba_code
            safe_print('-'*79)
            safe_print('EVALUATED VBA EXPRESSIONS:')
            t = prettytable.PrettyTable(('Obfuscated expression', 'Evaluated value'))
            t.align = 'l'
            t.max_width['Obfuscated expression'] = 36
            t.max_width['Evaluated value'] = 36
            for expression, expr_eval in core.scan_expressions(all_code):
                t.add_row((repr(expression), repr(expr_eval)))
                safe_print(t)

        else:
            safe_print('No VBA macros found.')
    except Exception as e:
        log.error("Caught exception. " + safe_str_convert(e))
        if (log.getEffectiveLevel() == logging.DEBUG):
            traceback.print_exc()

    safe_print('')
Exemple #6
0
    def _parse(self, filepath: str) -> Dict[str, Any]:
        """Parses an office document for static information.
        @param filepath: Path to the file to be analyzed.
        @return: results dict or None
        """

        results = {}
        if not HAVE_OLETOOLS:
            return results

        vba = False
        if is_rtf(filepath):
            try:
                with open(filepath, "rb") as f:
                    contents = f.read()
                temp_results = self._parse_rtf(contents)
                if temp_results:
                    results["office_rtf"] = temp_results
            except Exception as e:
                log.error(e, exc_info=True)
        else:
            try:
                vba = VBA_Parser(filepath)
            except ValueError as e:
                log.error("Error VBA_Parser: %s", str(e))
            except Exception:
                return results
        try:
            # extract DDE
            dde = extract_dde(filepath)
            if dde:
                results["office_dde"] = convert_to_printable(dde)
        except (csv_error, UnicodeDecodeError):
            pass
        except AttributeError:
            log.warning(
                "OleFile library bug: AttributeError! fix: pip3 install -U olefile"
            )
        except Exception as e:
            log.error(e, exc_info=True)

        officeresults = {"Metadata": {}}
        macro_folder = os.path.join(CUCKOO_ROOT, "storage", "analyses",
                                    self.task_id, "macros")
        if olefile.isOleFile(filepath):
            with olefile.OleFileIO(filepath) as ole:
                meta = ole.get_metadata()
                # must be left this way or we won't see the results
                officeresults["Metadata"] = self._get_meta(meta)
        else:
            with contextlib.suppress(KeyError):
                officeresults["Metadata"] = self._get_xml_meta(filepath)
        if vba and vba.detect_vba_macros():
            officeresults["Metadata"]["HasMacros"] = "Yes"
            # Create IOC and category vars. We do this before processing the
            # macro(s) to avoid overwriting data when there are multiple
            # macros in a single file.
            officeresults["Macro"] = {"Code": {}, "info": {}, "Analysis": {}}
            ctr = 0

            try:
                for _, _, vba_filename, vba_code in vba.extract_macros():
                    vba_code = filter_vba(vba_code)
                    if vba_code.strip() != "":
                        # Handle all macros
                        ctr += 1
                        outputname = f"Macro{ctr}"
                        officeresults["Macro"]["Code"][outputname] = [
                            (convert_to_printable(vba_filename),
                             convert_to_printable(vba_code))
                        ]
                        if not os.path.exists(macro_folder):
                            os.makedirs(macro_folder)
                        macro_file = os.path.join(macro_folder, outputname)
                        with open(macro_file, "w") as f:
                            f.write(convert_to_printable(vba_code))
                        officeresults["Macro"]["info"][outputname] = {
                            "yara_macro":
                            File(macro_file).get_yara(category="macro")
                        }
                        officeresults["Macro"]["info"][outputname][
                            "yara_macro"].extend(
                                File(macro_file).get_yara(category="CAPE"))

                        try:
                            iocs = vbadeobf.parse_macro(vba_code)
                            for pattern, match in iocs:
                                officeresults["Macro"]["Analysis"].setdefault(
                                    "IOCs", []).append((pattern, match))
                        except ValueError as e:
                            log.error("Can't parse macros for %s - %s ",
                                      filepath, str(e))
                        except Exception as e:
                            log.error(e, exc_info=True)
                        for keyword, description in detect_autoexec(vba_code):
                            officeresults["Macro"]["Analysis"].setdefault(
                                "AutoExec", []).append(
                                    (keyword.replace(".", "_"), description))
                        for keyword, description in detect_suspicious(
                                vba_code):
                            officeresults["Macro"]["Analysis"].setdefault(
                                "Suspicious", []).append(
                                    (keyword.replace(".", "_"), description))
                        for encoded, decoded in detect_hex_strings(vba_code):
                            officeresults["Macro"]["Analysis"].setdefault(
                                "HexStrings", []).append(
                                    (encoded, convert_to_printable(decoded)))
            except (AssertionError, UnexpectedDataError) as e:
                log.warning("Macros in static.py", e)

            if HAVE_VBA2GRAPH:
                vba2graph_func(filepath, self.task_id, self.sha256)

        else:
            officeresults["Metadata"]["HasMacros"] = "No"

        try:
            for indicator in OleID(filepath).check():
                if indicator.value and indicator.name in {
                        "Word Document", "Excel Workbook",
                        "PowerPoint Presentation"
                }:
                    officeresults["Metadata"]["DocumentType"] = indicator.name
        except Exception as e:
            log.error(e, exc_info=True)

        if HAVE_XLM_DEOBF:
            tmp_xlmmacro = xlmdeobfuscate(filepath, self.task_id,
                                          self.options.get("password", ""))
            if tmp_xlmmacro:
                officeresults["XLMMacroDeobfuscator"] = tmp_xlmmacro

        return officeresults
Exemple #7
0
def process_file(container, filename, data, altparser=False):
    """
    Process a single file

    :param container: str, path and filename of container if the file is within
    a zip archive, None otherwise.
    :param filename: str, path and filename of file on disk, or within the container.
    :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
    """
    #TODO: replace print by writing to a provided output file (sys.stdout by default)
    if container:
        display_filename = '%s in %s' % (filename, container)
    else:
        display_filename = filename
    print '=' * 79
    print 'FILE:', display_filename
    vm = ViperMonkey()
    try:
        #TODO: handle olefile errors, when an OLE file is malformed
        vba = VBA_Parser(filename, data)
        print 'Type:', vba.type
        if vba.detect_vba_macros():
            #print 'Contains VBA Macros:'
            for (subfilename, stream_path, vba_filename,
                 vba_code) in vba.extract_macros():
                # hide attribute lines:
                #TODO: option to disable attribute filtering
                vba_code_filtered = filter_vba(vba_code)
                print '-' * 79
                print 'VBA MACRO %s ' % vba_filename
                print 'in file: %s - OLE stream: %s' % (subfilename,
                                                        repr(stream_path))
                print '- ' * 39
                # detect empty macros:
                if vba_code_filtered.strip() == '':
                    print '(empty macro)'
                else:
                    # TODO: option to display code
                    vba_code = vba_collapse_long_lines(vba_code)
                    print '-' * 79
                    print 'VBA CODE (with long lines collapsed):'
                    print vba_code
                    print '-' * 79
                    print 'PARSING VBA CODE:'
                    try:
                        if altparser:
                            vm.add_module2(vba_code)
                        else:
                            vm.add_module(vba_code)
                    except ParseException as err:
                        print err.line
                        print " " * (err.column - 1) + "^"
                        print err

            print '-' * 79
            print 'TRACING VBA CODE (entrypoint = Auto*):'
            vm.trace()
            # print table of all recorded actions
            print('Recorded Actions:')
            print(vm.dump_actions())

        else:
            print 'No VBA macros found.'
    except:  #TypeError:
        #raise
        #TODO: print more info if debug mode
        #print sys.exc_value
        # display the exception with full stack trace for debugging, but do not stop:
        traceback.print_exc()
    print ''