def process_file_scanexpr (container, filename, data): """ Process a single file :param container: str, path and filename of container if the file is within a zip archive, None otherwise. :param filename: str, path and filename of file on disk, or within the container. :param data: bytes, content of the file if it is in a container, None if it is a file on disk. """ #TODO: replace print by writing to a provided output file (sys.stdout by default) if container: display_filename = '%s in %s' % (filename, container) else: display_filename = filename print '='*79 print 'FILE:', display_filename all_code = '' try: #TODO: handle olefile errors, when an OLE file is malformed vba = VBA_Parser(filename, data) print 'Type:', vba.type if vba.detect_vba_macros(): #print 'Contains VBA Macros:' for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): # hide attribute lines: #TODO: option to disable attribute filtering vba_code_filtered = filter_vba(vba_code) print '-'*79 print 'VBA MACRO %s ' % vba_filename print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) print '- '*39 # detect empty macros: if vba_code_filtered.strip() == '': print '(empty macro)' else: # TODO: option to display code print vba_code_filtered vba_code = vba_collapse_long_lines(vba_code) all_code += '\n' + vba_code print '-'*79 print 'EVALUATED VBA EXPRESSIONS:' t = prettytable.PrettyTable(('Obfuscated expression', 'Evaluated value')) t.align = 'l' t.max_width['Obfuscated expression'] = 36 t.max_width['Evaluated value'] = 36 for expression, expr_eval in scan_expressions(all_code): t.add_row((repr(expression), repr(expr_eval))) print t else: print 'No VBA macros found.' except: #TypeError: #raise #TODO: print more info if debug mode #print sys.exc_value # display the exception with full stack trace for debugging, but do not stop: traceback.print_exc() print ''
def parse_stream(subfilename, stream_path=None, vba_filename=None, vba_code=None, strip_useless=False): # Are the arguments all in a single tuple? if (stream_path is None): subfilename, stream_path, vba_filename, vba_code = subfilename # Collapse long lines. vba_code = vba_collapse_long_lines(vba_code) # Filter cruft from the VBA. vba_code = filter_vba(vba_code) if (strip_useless): vba_code = strip_useless_code(vba_code) print '-' * 79 print 'VBA MACRO %s ' % vba_filename print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) print '- ' * 39 # Parse the macro. m = None if vba_code.strip() == '': print '(empty macro)' m = "empty" else: print '-' * 79 print 'VBA CODE (with long lines collapsed):' print vba_code print '-' * 79 print 'PARSING VBA CODE:' try: # Enable PackRat for better performance: # (see https://pythonhosted.org/pyparsing/pyparsing.ParserElement-class.html#enablePackrat) ParserElement.enablePackrat() m = module.parseString(vba_code + "\n", parseAll=True)[0] m.code = vba_code except ParseException as err: print err.line print " " * (err.column - 1) + "^" print err print "Parse Error. Processing Aborted." return None # Return the parsed macro. return m
def extract_macro(self): vba = olevba.VBA_Parser(self.sample) macro_code = "" if vba.detect_vba_macros(): for (filename, stream_path, vba_filename, vba_code) in vba.extract_macros(): macro_code += olevba.filter_vba(vba_code) self.results["analysis"] = vba.analyze_macros() self.results["code"] = macro_code vba.close() return self.results vba.close() return False
def parse_stream(subfilename, stream_path=None, vba_filename=None, vba_code=None, strip_useless=False, local_funcs=None): """Parse the macros from a single OLE stream. @param subfilename (str) The name of the file containing the macros. @param stream_path (??) ?? @param vba_filename (??) ?? @param vba_code (str) The macro code to parse. @param local_funcs (list) A list of the names of already declared local VBA functions. @return (Module object) A parsed module object. """ # Set local func list if needed. if (local_funcs is None): local_funcs = [] # Check for timeouts. core.vba_object.limits_exceeded(throw_error=True) # Are the arguments all in a single tuple? if (stream_path is None): subfilename, stream_path, vba_filename, vba_code = subfilename # Skip old-style XLM macros. if (repr(stream_path).strip() == "'xlm_macro'"): log.warning("Skipping XLM macro stream...") return "empty" # Collapse long lines. vba_code = core.vba_collapse_long_lines(vba_code) # Filter cruft from the VBA. vba_code = filter_vba(vba_code) # Pull out Visual Basic from .hta contents (if we are looking at a # .hta file). vba_code = get_vb_contents_from_hta(vba_code) # Do not analyze the file if the VBA looks like garbage characters. if (read_ole_fields.is_garbage_vba(vba_code, no_html=True)): log.warning("Failed to extract VBScript from HTA. Skipping.") return "empty" # Skip some XML that olevba gives for some 2007+ streams. if (vba_code.strip().startswith("<?xml")): log.warning("Skipping XML stream.") return "empty" # Strip out code that does not affect the end result of the program. if (strip_useless): vba_code = core.strip_lines.strip_useless_code(vba_code, local_funcs) safe_print('-'*79) safe_print('VBA MACRO %s ' % vba_filename) safe_print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))) safe_print('- '*39) # Parse the macro. m = None if vba_code.strip() == '': safe_print('(empty macro)') m = "empty" else: safe_print('-'*79) safe_print('VBA CODE (with long lines collapsed):') safe_print(vba_code) safe_print('-'*79) #sys.exit(0) safe_print('PARSING VBA CODE:') try: m = core.module.parseString(vba_code + "\n", parseAll=True)[0] pyparsing.ParserElement.resetCache() m.code = vba_code except pyparsing.ParseException as err: safe_print(err.line) safe_print(" "*(err.column-1) + "^") safe_print(err) log.error("Parse Error. Processing Aborted.") return None # Check for timeouts. core.vba_object.limits_exceeded(throw_error=True) # Return the parsed macro. return m
def process_file_scanexpr (container, filename, data): """Process a single file. @param container (str) Path and filename of container if the file is within a zip archive, None otherwise. @param filename (str) path and filename of file on disk, or within the container. @param data (bytes) Content of the file if it is in a container, None if it is a file on disk. """ #TODO: replace print by writing to a provided output file (sys.stdout by default) if container: display_filename = '%s in %s' % (filename, container) else: display_filename = filename safe_print('='*79) safe_print('FILE: ' + safe_str_convert(display_filename)) all_code = '' try: #TODO: handle olefile errors, when an OLE file is malformed import oletools oletools.olevba.enable_logging() if (log.getEffectiveLevel() == logging.DEBUG): log.debug('opening %r' % filename) vba = VBA_Parser(filename, data, relaxed=True) if vba.detect_vba_macros(): # Read in document metadata. vm = core.ViperMonkey(filename, data) ole = olefile.OleFileIO(filename) try: vm.set_metadata(ole.get_metadata()) except Exception as e: log.warning("Reading in metadata failed. Trying fallback. " + safe_str_convert(e)) vm.set_metadata(get_metadata_exif(filename)) #print 'Contains VBA Macros:' for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): # hide attribute lines: #TODO: option to disable attribute filtering vba_code = filter_vba(vba_code) safe_print('-'*79) safe_print('VBA MACRO %s ' % vba_filename) safe_print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))) safe_print('- '*39) # detect empty macros: if vba_code.strip() == '': safe_print('(empty macro)') else: # TODO: option to display code safe_print(vba_code) vba_code = core.vba_collapse_long_lines(vba_code) all_code += '\n' + vba_code safe_print('-'*79) safe_print('EVALUATED VBA EXPRESSIONS:') t = prettytable.PrettyTable(('Obfuscated expression', 'Evaluated value')) t.align = 'l' t.max_width['Obfuscated expression'] = 36 t.max_width['Evaluated value'] = 36 for expression, expr_eval in core.scan_expressions(all_code): t.add_row((repr(expression), repr(expr_eval))) safe_print(t) else: safe_print('No VBA macros found.') except Exception as e: log.error("Caught exception. " + safe_str_convert(e)) if (log.getEffectiveLevel() == logging.DEBUG): traceback.print_exc() safe_print('')
def _parse(self, filepath: str) -> Dict[str, Any]: """Parses an office document for static information. @param filepath: Path to the file to be analyzed. @return: results dict or None """ results = {} if not HAVE_OLETOOLS: return results vba = False if is_rtf(filepath): try: with open(filepath, "rb") as f: contents = f.read() temp_results = self._parse_rtf(contents) if temp_results: results["office_rtf"] = temp_results except Exception as e: log.error(e, exc_info=True) else: try: vba = VBA_Parser(filepath) except ValueError as e: log.error("Error VBA_Parser: %s", str(e)) except Exception: return results try: # extract DDE dde = extract_dde(filepath) if dde: results["office_dde"] = convert_to_printable(dde) except (csv_error, UnicodeDecodeError): pass except AttributeError: log.warning( "OleFile library bug: AttributeError! fix: pip3 install -U olefile" ) except Exception as e: log.error(e, exc_info=True) officeresults = {"Metadata": {}} macro_folder = os.path.join(CUCKOO_ROOT, "storage", "analyses", self.task_id, "macros") if olefile.isOleFile(filepath): with olefile.OleFileIO(filepath) as ole: meta = ole.get_metadata() # must be left this way or we won't see the results officeresults["Metadata"] = self._get_meta(meta) else: with contextlib.suppress(KeyError): officeresults["Metadata"] = self._get_xml_meta(filepath) if vba and vba.detect_vba_macros(): officeresults["Metadata"]["HasMacros"] = "Yes" # Create IOC and category vars. We do this before processing the # macro(s) to avoid overwriting data when there are multiple # macros in a single file. officeresults["Macro"] = {"Code": {}, "info": {}, "Analysis": {}} ctr = 0 try: for _, _, vba_filename, vba_code in vba.extract_macros(): vba_code = filter_vba(vba_code) if vba_code.strip() != "": # Handle all macros ctr += 1 outputname = f"Macro{ctr}" officeresults["Macro"]["Code"][outputname] = [ (convert_to_printable(vba_filename), convert_to_printable(vba_code)) ] if not os.path.exists(macro_folder): os.makedirs(macro_folder) macro_file = os.path.join(macro_folder, outputname) with open(macro_file, "w") as f: f.write(convert_to_printable(vba_code)) officeresults["Macro"]["info"][outputname] = { "yara_macro": File(macro_file).get_yara(category="macro") } officeresults["Macro"]["info"][outputname][ "yara_macro"].extend( File(macro_file).get_yara(category="CAPE")) try: iocs = vbadeobf.parse_macro(vba_code) for pattern, match in iocs: officeresults["Macro"]["Analysis"].setdefault( "IOCs", []).append((pattern, match)) except ValueError as e: log.error("Can't parse macros for %s - %s ", filepath, str(e)) except Exception as e: log.error(e, exc_info=True) for keyword, description in detect_autoexec(vba_code): officeresults["Macro"]["Analysis"].setdefault( "AutoExec", []).append( (keyword.replace(".", "_"), description)) for keyword, description in detect_suspicious( vba_code): officeresults["Macro"]["Analysis"].setdefault( "Suspicious", []).append( (keyword.replace(".", "_"), description)) for encoded, decoded in detect_hex_strings(vba_code): officeresults["Macro"]["Analysis"].setdefault( "HexStrings", []).append( (encoded, convert_to_printable(decoded))) except (AssertionError, UnexpectedDataError) as e: log.warning("Macros in static.py", e) if HAVE_VBA2GRAPH: vba2graph_func(filepath, self.task_id, self.sha256) else: officeresults["Metadata"]["HasMacros"] = "No" try: for indicator in OleID(filepath).check(): if indicator.value and indicator.name in { "Word Document", "Excel Workbook", "PowerPoint Presentation" }: officeresults["Metadata"]["DocumentType"] = indicator.name except Exception as e: log.error(e, exc_info=True) if HAVE_XLM_DEOBF: tmp_xlmmacro = xlmdeobfuscate(filepath, self.task_id, self.options.get("password", "")) if tmp_xlmmacro: officeresults["XLMMacroDeobfuscator"] = tmp_xlmmacro return officeresults
def process_file(container, filename, data, altparser=False): """ Process a single file :param container: str, path and filename of container if the file is within a zip archive, None otherwise. :param filename: str, path and filename of file on disk, or within the container. :param data: bytes, content of the file if it is in a container, None if it is a file on disk. """ #TODO: replace print by writing to a provided output file (sys.stdout by default) if container: display_filename = '%s in %s' % (filename, container) else: display_filename = filename print '=' * 79 print 'FILE:', display_filename vm = ViperMonkey() try: #TODO: handle olefile errors, when an OLE file is malformed vba = VBA_Parser(filename, data) print 'Type:', vba.type if vba.detect_vba_macros(): #print 'Contains VBA Macros:' for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): # hide attribute lines: #TODO: option to disable attribute filtering vba_code_filtered = filter_vba(vba_code) print '-' * 79 print 'VBA MACRO %s ' % vba_filename print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) print '- ' * 39 # detect empty macros: if vba_code_filtered.strip() == '': print '(empty macro)' else: # TODO: option to display code vba_code = vba_collapse_long_lines(vba_code) print '-' * 79 print 'VBA CODE (with long lines collapsed):' print vba_code print '-' * 79 print 'PARSING VBA CODE:' try: if altparser: vm.add_module2(vba_code) else: vm.add_module(vba_code) except ParseException as err: print err.line print " " * (err.column - 1) + "^" print err print '-' * 79 print 'TRACING VBA CODE (entrypoint = Auto*):' vm.trace() # print table of all recorded actions print('Recorded Actions:') print(vm.dump_actions()) else: print 'No VBA macros found.' except: #TypeError: #raise #TODO: print more info if debug mode #print sys.exc_value # display the exception with full stack trace for debugging, but do not stop: traceback.print_exc() print ''