def __init__(self, filename, data): self.comments = None self.metadata = None self.filename = filename self.data = data self.modules = [] self.modules_code = [] self.globals = {} self.externals = {} # list of actions (stored as tuples by report_action) self.actions = [] self.vba = None # Figure out whether this is VBScript or VBA. vba_pointer = self.filename is_data = False if ((self.filename is None) or (len(self.filename.strip()) == 0)): vba_pointer = self.data is_data = True self.is_vbscript = False if (filetype.is_office_file(vba_pointer, is_data)): self.is_vbscript = False log.info("Emulating an Office (VBA) file.") else: self.is_vbscript = True log.info("Emulating a VBScript file.") # Olevba uses '\n' as EOL, regular VBScript uses '\r\n'. if (self.is_vbscript == True): vba_library.VBA_LIBRARY['vbCrLf'] = '\r\n' # Track the loaded Excel spreadsheet (xlrd). self.loaded_excel = None # Track data saved in document variables. self.doc_vars = {} # Track document text. self.doc_text = "" # Track document tables. self.doc_tables = [] # List of entry point functions to emulate. self.entry_points = ['autoopen', 'document_open', 'autoclose', 'document_close', 'auto_open', 'autoexec', 'autoexit', 'document_beforeclose', 'workbook_open', 'workbook_activate', 'auto_close', 'workbook_close', 'workbook_deactivate', 'documentopen', 'app_documentopen', 'main'] # List of suffixes of the names of callback functions that provide alternate # methods for running things on document (approximately) open. # See https://www.greyhathacker.net/?m=201609 self.callback_suffixes = ['_Activate', '_BeforeNavigate2', '_BeforeScriptExecute', '_Change', '_DocumentComplete', '_DownloadBegin', '_DownloadComplete', '_FileDownload', '_GotFocus', '_Layout', '_LostFocus', '_MouseEnter', '_MouseHover', '_MouseLeave', '_MouseMove', '_NavigateComplete2', '_NavigateError', '_Painted', '_Painting', '_ProgressChange', '_PropertyChange', '_Resize', '_SetSecureLockIcon', '_StatusTextChange', '_TitleChange', '_Initialize', '_Click', '_OnConnecting', '_BeforeClose', '_OnDisconnected', '_OnEnterFullScreenMode', '_Zoom', '_Scroll']
def __init__(self, filename, data, do_jit=False): """Create a new VBA/VBScript emulator object. @param filename (str) The name of the file being analyzed. @param data (str) The VBA/VBScript code to emulate. @param do_jit (boolean) If True use JIT transpilation of VB code to Python to speed up loop analysis, if False just emulate loops using the regular emulation engine. """ super(ViperMonkey, self).__init__() self.do_jit = do_jit self.comments = None self.metadata = None self.filename = filename self.data = data self.modules = [] self.modules_code = [] self.decoded_strs = set() self.globals = {} self.externals = {} # list of actions (stored as tuples by report_action) self.actions = [] self.vba = None # Figure out whether this is VBScript or VBA. vba_pointer = self.filename is_data = False if ((self.filename is None) or (len(self.filename.strip()) == 0)): vba_pointer = self.data is_data = True self.is_vbscript = False if (filetype.is_office_file(vba_pointer, is_data)): self.is_vbscript = False log.info("Emulating an Office (VBA) file.") else: self.is_vbscript = True log.info("Emulating a VBScript file.") #print "\n\nREMOVE THIS!!\n\n" #self.is_vbscript = False # Olevba uses '\n' as EOL, regular VBScript uses '\r\n'. if self.is_vbscript: vba_library.VBA_LIBRARY['vbCrLf'] = '\r\n' # Track the loaded Excel spreadsheet (xlrd). self.loaded_excel = None # Track data saved in document variables. self.doc_vars = {} # Track document text. self.doc_text = "" # Track document tables. self.doc_tables = [] # List of entry point functions to emulate. self.entry_points = [ 'autoopen', 'document_open', 'autoclose', 'document_close', 'auto_open', 'autoexec', 'autoexit', 'document_beforeclose', 'workbook_open', 'workbook_activate', 'auto_close', 'workbook_close', 'workbook_deactivate', 'documentopen', 'app_documentopen', 'main' ] # List of user-specified entry points. If non-empty only these entry points # will be used. self.user_entry_points = [] # List of suffixes of the names of callback functions that provide alternate # methods for running things on document (approximately) open. # See https://www.greyhathacker.net/?m=201609 self.callback_suffixes = [ '_Activate', '_BeforeNavigate2', '_BeforeScriptExecute', '_Calculate', '_Change', '_DocumentComplete', '_DownloadBegin', '_DownloadComplete', '_FileDownload', '_GotFocus', '_Layout', '_LostFocus', '_MouseEnter', '_MouseHover', '_MouseLeave', '_MouseMove', '_NavigateComplete2', '_NavigateError', '_Painted', '_Painting', '_ProgressChange', '_PropertyChange', '_Resize', '_SetSecureLockIcon', '_StatusTextChange', '_TitleChange', '_Initialize', '_Click', '_OnConnecting', '_BeforeClose', '_OnDisconnected', '_OnEnterFullScreenMode', '_Zoom', '_Scroll', '_BeforeDropOrPaste' ]
def load_excel_libreoffice(data): """Read in an Excel file into an ExcelBook object by using LibreOffice. @param data (str) The Excel file contents. @return (core.excel.ExceBook object) On success return the Excel spreadsheet as an ExcelBook object. Returns None on error. """ # Don't try this if it is not an Office file. if (not filetype.is_office_file(data, True)): log.warning( "The file is not an Office file. Not extracting sheets with LibreOffice." ) return None # Save the Excel data to a temporary file. out_dir = "/tmp/tmp_excel_file_" + str(random.randrange(0, 10000000000)) f = open(out_dir, 'wb') f.write(data) f.close() # Dump all the sheets as CSV files using soffice. output = None try: output = subprocess.check_output([ "timeout", "30", "python3", _thismodule_dir + "/../export_all_excel_sheets.py", out_dir ]) except Exception as e: log.error("Running export_all_excel_sheets.py failed. " + str(e)) os.remove(out_dir) return None # Get the names of the sheet files, if there are any. Also get the name of # the currently active sheet. try: sheet_files = json.loads(output.replace("'", '"')) except Exception as e: if (log.getEffectiveLevel() == logging.DEBUG): log.debug("Loading sheet file names failed. " + str(e)) os.remove(out_dir) return None # No sheets exported? The 1st element is the name of the active sheet, # hence the <= 1. if (len(sheet_files) <= 1): os.remove(out_dir) return None # Save the name of the active sheet. active_sheet_name = _fix_sheet_name(sheet_files[0]) # Load the CSV files into Excel objects. sheet_map = {} for sheet_file in sheet_files[1:]: # Read the CSV file into a single Excel workbook object. tmp_workbook = _read_sheet_from_csv(sheet_file) # Pull the cell data for the current sheet. cell_data = tmp_workbook.sheet_by_name("Sheet1").cells # Pull out the name of the current sheet. start = sheet_file.index("--") + 2 end = sheet_file.rindex(".") sheet_name = _fix_sheet_name(sheet_file[start:end]) # Pull out the index of the current sheet. start = sheet_file.index("-") + 1 end = sheet_file[start:].index("-") + start sheet_index = int(sheet_file[start:end]) # Make a sheet with the current name and data. tmp_sheet = ExcelSheet(cell_data, sheet_name) # Map the sheet to its index. sheet_map[sheet_index] = tmp_sheet # Save the sheets in the proper order into a workbook. result_book = ExcelBook(None) sorted_indices = list(sheet_map.keys()) sorted_indices.sort() for index in sorted_indices: result_book.sheets.append(sheet_map[index]) # Set the name of the active sheet. if (active_sheet_name != "NO_ACTIVE_SHEET"): result_book.active_sheet_name = active_sheet_name # Delete the temp files with the CSV sheet data. for sheet_file in sheet_files[1:]: os.remove(sheet_file) # Delete the temporary Excel file. if os.path.isfile(out_dir): os.remove(out_dir) # Return the workbook. return result_book
def load_excel_libreoffice(data): """ Load the sheets from a given in-memory Excel file into a Workbook object. @param data (binary blob) The contents of an Excel file. @return (ExcelBook object) On success return a workbook object with the read in Excel workbook, on failure return None. """ # Don't try this if it is not an Office file. if (not filetype.is_office_file(data, True)): print( "WARNING: The file is not an Office file. Not extracting sheets with LibreOffice." ) return None # Unhide hidden Excel sheets. data = _unhide_sheets(data) # Save the Excel data to a temporary file. out_dir = "/tmp/tmp_excel_file_" + str(random.randrange(0, 10000000000)) f = open(out_dir, 'wb') f.write(data) f.close() # Dump all the sheets as CSV files using soffice. output = None _thismodule_dir = os.path.normpath( os.path.abspath(os.path.dirname(__file__))) try: output = subprocess.check_output([ "python3", _thismodule_dir + "/export_all_excel_sheets.py", out_dir ]) except Exception as e: print("ERROR: Running export_all_excel_sheets.py failed. " + str(e)) #os.remove(out_dir) return None # Get the names of the sheet files, if there are any. try: sheet_files = json.loads(output.replace(b"'", b'"')) except Exception as e: print(e) os.remove(out_dir) return None if (len(sheet_files) == 0): os.remove(out_dir) return None # Load the CSV files into Excel objects. sheet_map = {} for sheet_file in sheet_files: # Read the CSV file into a single Excel workbook object. tmp_workbook = read_sheet_from_csv(sheet_file) # Pull the cell data for the current sheet. cell_data = tmp_workbook.sheet_by_name("Sheet1").cells # Pull out the name of the current sheet. start = sheet_file.index("--") + 2 end = sheet_file.rindex(".") sheet_name = sheet_file[start:end] # Pull out the index of the current sheet. start = sheet_file.index("-") + 1 end = sheet_file[start:].index("-") + start sheet_index = int(sheet_file[start:end]) # Make a sheet with the current name and data. tmp_sheet = ExcelSheet(cell_data, sheet_name) # Map the sheet to its index. sheet_map[sheet_index] = tmp_sheet # Save the sheets in the proper order into a workbook. result_book = ExcelBook(None) for index in range(0, len(sheet_map)): result_book.sheets.append(sheet_map[index]) # Delete the temp files with the CSV sheet data. for sheet_file in sheet_files: os.remove(sheet_file) # Delete the temporary Excel file. if os.path.isfile(out_dir): os.remove(out_dir) # Return the workbook. return result_book