def Parse(self, parser_context, file_entry): """Extract data from a Windows EventLog (EVT) file. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). Yields: An event object (instance of WinEvtRecordEvent) that contains the parsed data. """ file_object = file_entry.GetFileObject() evt_file = pyevt.file() evt_file.set_ascii_codepage(parser_context.codepage) try: evt_file.open_file_object(file_object) except IOError as exception: evt_file.close() file_object.close() raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s} with error: {2:s}'.format( self.NAME, file_entry.name, exception)) for record_index in range(0, evt_file.number_of_records): try: evt_record = evt_file.get_record(record_index) self._ParseRecord(parser_context, evt_record, file_entry=file_entry) except IOError as exception: logging.warning(( u'[{0:s}] unable to parse event record: {1:d} in file: {2:s} ' u'with error: {3:s}').format( self.NAME, record_index, file_entry.name, exception)) for record_index in range(0, evt_file.number_of_recovered_records): try: evt_record = evt_file.get_recovered_record(record_index) self._ParseRecord( parser_context, evt_record, file_entry=file_entry, recovered=True) except IOError as exception: logging.info(( u'[{0:s}] unable to parse recovered event record: {1:d} in file: ' u'{2:s} with error: {3:s}').format( self.NAME, record_index, file_entry.name, exception)) evt_file.close() file_object.close()
def ParseFileObject(self, parser_mediator, file_object): """Parses a Windows Recycler INFO2 file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ # Since this header value is really generic it is hard not to use filename # as an indicator too. # TODO: Rethink this and potentially make a better test. filename = parser_mediator.GetFilename() if not filename.startswith('INFO2'): return file_header_map = self._GetDataTypeMap('recycler_info2_file_header') try: file_header, _ = self._ReadStructureFromFileObject( file_object, 0, file_header_map) except (ValueError, errors.ParseError) as exception: raise errors.UnableToParseFile(( 'Unable to parse Windows Recycler INFO2 file header with ' 'error: {0!s}').format(exception)) if file_header.unknown1 != 5: parser_mediator.ProduceExtractionWarning('unsupported format signature.') return file_entry_size = file_header.file_entry_size if file_entry_size not in (280, 800): parser_mediator.ProduceExtractionWarning( 'unsupported file entry size: {0:d}'.format(file_entry_size)) return file_offset = file_object.get_offset() file_size = file_object.get_size() while file_offset < file_size: self._ParseInfo2Record( parser_mediator, file_object, file_offset, file_entry_size) file_offset += file_entry_size
def ParseFileObject(self, parser_mediator, file_object): """Parses a Portable Executable (PE) file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ pe_data_slice = dfvfs_data_slice.DataSlice(file_object) try: pefile_object = pefile.PE(data=pe_data_slice, fast_load=True) pefile_object.parse_data_directories( directories=self._PE_DIRECTORIES) except Exception as exception: raise errors.UnableToParseFile( 'Unable to read PE file with error: {0!s}'.format(exception)) event_data = PEEventData() # Note that the result of get_imphash() is an empty string if there is no # import hash. event_data.imphash = pefile_object.get_imphash() or None event_data.pe_type = self._GetPEType(pefile_object) event_data.section_names = self._GetSectionNames(pefile_object) timestamp = getattr(pefile_object.FILE_HEADER, 'TimeDateStamp', None) if timestamp: date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp) else: date_time = dfdatetime_semantic_time.NotSet() event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_CREATION) parser_mediator.ProduceEventWithEventData(event, event_data) self._ParseExportTable(parser_mediator, pefile_object, event_data) self._ParseImportTable(parser_mediator, pefile_object, event_data) self._ParseLoadConfigurationTable(parser_mediator, pefile_object, event_data) self._ParseDelayImportTable(parser_mediator, pefile_object, event_data) self._ParseResourceSection(parser_mediator, pefile_object, event_data)
def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses an UTMPX file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ if not self._VerifyStructure(file_object): raise errors.UnableToParseFile( 'The file is not an UTMPX file.') while self._ReadEntry(parser_mediator, file_object): pass
def ParseRecord(self, mediator, key, structure): """Parses a record and produces a Bash history event. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: UnableToParseFile: if an unsupported key is provided. """ if key != u'log_entry': raise errors.UnableToParseFile( u'Unsupported key: {0:s}'.format(key)) event = BashHistoryEvent(structure.timestamp, structure.command) mediator.ProduceEvent(event)
def _GetFirefoxConfig(self, file_object, display_name): """Determine cache file block size. Args: file_object (dfvfs.FileIO): a file-like object. display_name (str): display name. Raises: UnableToParseFile: if no valid cache record could be found. """ # There ought to be a valid record within the first 4 MiB. We use this # limit to prevent reading large invalid files. to_read = min(file_object.get_size(), self._INITIAL_CACHE_FILE_SIZE) while file_object.get_offset() < to_read: offset = file_object.get_offset() try: cache_entry, _ = self._ReadCacheEntry(file_object, display_name, self._MINIMUM_BLOCK_SIZE) # We have not yet determined the block size, so we use the smallest # possible size. record_size = (self._CACHE_ENTRY_HEADER_SIZE + cache_entry.request_size + cache_entry.information_size) if record_size >= 4096: # _CACHE_003_ block_size = 4096 elif record_size >= 1024: # _CACHE_002_ block_size = 1024 else: # _CACHE_001_ block_size = 256 return self.FIREFOX_CACHE_CONFIG(block_size, offset) except IOError: logger.debug('[{0:s}] {1:s}:{2:d}: Invalid record.'.format( self.NAME, display_name, offset)) raise errors.UnableToParseFile( 'Could not find a valid cache record. Not a Firefox cache file.')
def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a Windows XML EventLog (EVTX) file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ evtx_file = pyevtx.file() evtx_file.set_ascii_codepage(parser_mediator.codepage) try: evtx_file.open_file_object(file_object) except IOError as exception: evtx_file.close() raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s} with error: {2:s}'.format( self.NAME, parser_mediator.GetDisplayName(), exception)) for record_index in range(0, evtx_file.number_of_records): try: evtx_record = evtx_file.get_record(record_index) event_object = WinEvtxRecordEvent(evtx_record) parser_mediator.ProduceEvent(event_object) except IOError as exception: logging.warning(( u'[{0:s}] unable to parse event record: {1:d} in file: {2:s} ' u'with error: {3:s}').format( self.NAME, record_index, parser_mediator.GetDisplayName(), exception)) for record_index in range(0, evtx_file.number_of_recovered_records): try: evtx_record = evtx_file.get_recovered_record(record_index) event_object = WinEvtxRecordEvent(evtx_record, recovered=True) parser_mediator.ProduceEvent(event_object) except IOError as exception: logging.debug(( u'[{0:s}] unable to parse recovered event record: {1:d} in file: ' u'{2:s} with error: {3:s}').format( self.NAME, record_index, parser_mediator.GetDisplayName(), exception)) evtx_file.close()
def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses an OLE Compound File (OLECF) file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ olecf_file = pyolecf.file() olecf_file.set_ascii_codepage(parser_mediator.codepage) try: olecf_file.open_file_object(file_object) except IOError as exception: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.NAME, parser_mediator.GetDisplayName(), exception)) # Get a list of all root items from the OLE CF file. root_item = olecf_file.root_item item_names = [item.name for item in root_item.sub_items] # Compare the list of available plugins. # We will try to use every plugin against the file (except # the default plugin) and run it. Only if none of the plugins # works will we use the default plugin. parsed = False for plugin_object in self._plugins: try: plugin_object.UpdateChainAndProcess( parser_mediator, root_item=root_item, item_names=item_names) except errors.WrongPlugin: logging.debug( u'[{0:s}] plugin: {1:s} cannot parse the OLECF file: {2:s}'.format( self.NAME, plugin_object.NAME, parser_mediator.GetDisplayName())) # Check if we still haven't parsed the file, and if so we will use # the default OLECF plugin. if not parsed and self._default_plugin: self._default_plugin.UpdateChainAndProcess( parser_mediator, root_item=root_item, item_names=item_names) olecf_file.close()
def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses an Opera global history file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ text_file_object = text_file.TextFile(file_object) if not self._ParseAndValidateRecord(parser_mediator, text_file_object): raise errors.UnableToParseFile( u'Unable to parse as Opera global_history.dat.') while self._ParseRecord(parser_mediator, text_file_object): pass
def Parse(self, parser_mediator, **kwargs): """Parsers the file entry and extracts event objects. Args: parser_mediator: a parser mediator object (instance of ParserMediator). Raises: UnableToParseFile: when the file cannot be parsed. """ file_entry = parser_mediator.GetFileEntry() if not file_entry: raise errors.UnableToParseFile(u'Invalid file entry') parser_mediator.AppendToParserChain(self) try: self.ParseFileEntry(parser_mediator, file_entry, **kwargs) finally: parser_mediator.PopFromParserChain()
def ParseCacheEntry(self, file_object, block_offset): """Parses a cache entry. Args: file_object (dfvfs.FileIO): a file-like object to read from. block_offset (int): block offset of the cache entry. Returns: CacheEntry: cache entry. Raises: ParseError: if the cache entry cannot be read. """ cache_entry_map = self._GetDataTypeMap('chrome_cache_entry') try: cache_entry, _ = self._ReadStructureFromFileObject( file_object, block_offset, cache_entry_map) except (ValueError, errors.ParseError) as exception: raise errors.UnableToParseFile(( 'Unable to parse cache entry at offset: 0x{0:08x} with error: ' '{1!s}').format(block_offset, exception)) cache_entry_object = CacheEntry() cache_entry_object.hash = cache_entry.hash cache_entry_object.next = CacheAddress(cache_entry.next_address) cache_entry_object.rankings_node = CacheAddress( cache_entry.rankings_node_address) cache_entry_object.creation_time = cache_entry.creation_time byte_array = cache_entry.key byte_string = bytes(bytearray(byte_array)) cache_entry_object.key, _, _ = byte_string.partition(b'\x00') try: cache_entry_object.original_url = cache_entry_object.key.decode( 'ascii') except UnicodeDecodeError as exception: raise errors.ParseError( 'Unable to decode original URL in key with error: {0!s}'. format(exception)) return cache_entry_object
def GetTopLevel(self, file_object): """Returns the deserialized content of a plist as a dictionary object. Args: file_object (dfvfs.FileIO): a file-like object to parse. Returns: dict[str, object]: contents of the plist. Raises: UnableToParseFile: when the file cannot be parsed. """ try: top_level_object = plistlib.load(file_object) except plistlib.InvalidFileException as exception: raise errors.UnableToParseFile( 'Unable to parse plist with error: {0!s}'.format(exception)) return top_level_object
def ParseRecord(self, parser_mediator, key, structure): """Parses a record and produces a Zsh history event. Args: parser_mediator: a parser mediator object (instance of ParserMediator). key: an string indicating the name of the parsed structure. structure: the elements parsed from the file (instance of pyparsing.ParseResults). Raises: UnableToParseFile: if an unsupported key is provided. """ if key != u'command': raise errors.UnableToParseFile(u'Unsupported key {0:s}'.format(key)) event_object = ZshHistoryEvent( structure[u'timestamp'], structure[u'elapsed_seconds'], structure[u'command']) parser_mediator.ProduceEvent(event_object)
def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a Mac OS X keychain file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_object.seek(0, os.SEEK_SET) table_offsets = self._VerifyStructure(file_object) if not table_offsets: raise errors.UnableToParseFile(u'The file is not a Keychain file.') for table_offset in table_offsets: # Skipping X bytes, unknown data at this point. file_object.seek(table_offset - file_object.tell(), os.SEEK_CUR) try: table = self.TABLE_HEADER.parse_stream(file_object) except construct.FieldError as exception: logging.warning( (u'[{0:s}] Unable to parse table header in file: {1:s} ' u'with error: {2:s}.').format( self.NAME, parser_mediator.GetDisplayName(), exception)) continue # Table_offset: absolute byte in the file where the table starts. # table.first_record: first record in the table, relative to the # first byte of the table. file_object.seek( table_offset + table.first_record - file_object.tell(), os.SEEK_CUR) if table.record_type == self.RECORD_TYPE_INTERNET: for _ in range(table.number_of_records): self._ReadEntryInternet(parser_mediator, file_object) elif table.record_type == self.RECORD_TYPE_APPLICATION: for _ in range(table.number_of_records): self._ReadEntryApplication(parser_mediator, file_object)
def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses an UTMPX file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: The file-like object to extract data from. Raises: UnableToParseFile: when the file cannot be parsed. """ if not self._VerifyStructure(file_object): raise errors.UnableToParseFile(u'The file is not an UTMPX file.') event_object = self._ReadEntry(file_object) while event_object: event_object.offset = file_object.tell() parser_mediator.ProduceEvent(event_object) event_object = self._ReadEntry(file_object)
def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a Firefox cache file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ filename = parser_mediator.GetFilename() display_name = parser_mediator.GetDisplayName() try: # Match cache filename. Five hex characters + 'm' + two digit # number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd' # instead contain data only. self._CACHE_FILENAME.parseString(filename) except pyparsing.ParseException: if not filename.startswith(u'_CACHE_00'): raise errors.UnableToParseFile(u'Not a Firefox cache1 file.') firefox_config = self._GetFirefoxConfig(file_object, display_name) file_object.seek(firefox_config.first_record_offset) while file_object.get_offset() < file_object.get_size(): try: fetched, modified, expire = self._NextRecord( file_object, display_name, firefox_config.block_size) parser_mediator.ProduceEvent(fetched) if modified: parser_mediator.ProduceEvent(modified) if expire: parser_mediator.ProduceEvent(expire) except IOError: file_offset = file_object.get_offset( ) - self._MINUMUM_BLOCK_SIZE logging.debug( (u'[{0:s}] Invalid cache record in file: {1:s} at offset: ' u'{2:d}.').format(self.NAME, display_name, file_offset))
def Parse(self, parser_context, file_entry): """Extract data from a Keychain file. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). """ file_object = file_entry.GetFileObject() table_offsets = self._VerifyStructure(file_object) if not table_offsets: file_object.close() raise errors.UnableToParseFile(u'The file is not a Keychain file.') for table_offset in table_offsets: # Skipping X bytes, unknown data at this point. file_object.seek(table_offset - file_object.tell(), os.SEEK_CUR) try: table = self.TABLE_HEADER.parse_stream(file_object) except construct.FieldError as exception: logging.warning(( u'[{0:s}] Unable to parse table header in file: {1:s} ' u'with error: {2:s}.').format( self.NAME, parser_context.GetDisplayName(file_entry), exception)) continue # Table_offset: absolute byte in the file where the table starts. # table.first_record: first record in the table, relative to the # first byte of the table. file_object.seek( table_offset + table.first_record - file_object.tell(), os.SEEK_CUR) if table.record_type == self.RECORD_TYPE_INTERNET: for _ in range(table.number_of_records): self._ReadEntryInternet( parser_context, file_object, file_entry=file_entry) elif table.record_type == self.RECORD_TYPE_APPLICATION: for _ in range(table.number_of_records): self._ReadEntryApplication( parser_context, file_object, file_entry=file_entry) file_object.close()
def Parse(self, file_entry): """Extract data from a Keychain file. Args: file_entry: a file entry object. Yields: An event object (instance of KeychainInternetRecordEvent) for each record. """ file_object = file_entry.GetFileObject() table_offsets = self._VerifyStructure(file_object) if not table_offsets: raise errors.UnableToParseFile(u'The file is not a Keychain file.') for table_offset in table_offsets: # Skipping X bytes, unknown data at this point. file_object.seek(table_offset - file_object.tell(), os.SEEK_CUR) try: table = self.TABLE_HEADER.parse_stream(file_object) except construct.FieldError as exception: logging.warning( (u'Unable to parse table header, moving to the next one, ' u'reason: {:s}').format(exception)) continue # Table_offset: absolute byte in the file where the table starts. # table.first_record: first record in the table, relative to the # first byte of the table. file_object.seek( table_offset + table.first_record - file_object.tell(), os.SEEK_CUR) if table.record_type == self.RECORD_TYPE_INTERNET: for _ in range(table.number_of_records): for object_record in self._ReadEntryInternet(file_object): yield object_record elif table.record_type == self.RECORD_TYPE_APPLICATION: for _ in range(table.number_of_records): for object_record in self._ReadEntryApplication( file_object): yield object_record file_object.close()
def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a Windows XML EventLog (EVTX) file-like object. Args: parser_mediator: a parser mediator object (instance of ParserMediator). file_object: a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ evtx_file = pyevtx.file() evtx_file.set_ascii_codepage(parser_mediator.codepage) try: evtx_file.open_file_object(file_object) except IOError as exception: display_name = parser_mediator.GetDisplayName() raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s} with error: {2:s}'.format( self.NAME, display_name, exception)) for record_index, evtx_record in enumerate(evtx_file.records): try: self._ParseRecord(parser_mediator, record_index, evtx_record) except IOError as exception: parser_mediator.ProduceParseError( u'unable to parse event record: {0:d} with error: {1:s}'. format(record_index, exception)) for record_index, evtx_record in enumerate( evtx_file.recovered_records): try: self._ParseRecord(parser_mediator, record_index, evtx_record, recovered=True) except IOError as exception: parser_mediator.ProduceParseError(( u'unable to parse recovered event record: {0:d} with error: ' u'{1:s}').format(record_index, exception)) evtx_file.close()
def Parse(self, file_entry): """Extract data from a Windows XML EventLog (EVTX) file. Args: file_entry: A file entry object. Yields: An event object (WinEvtxRecordEvent) that contains the parsed data. """ file_object = file_entry.GetFileObject() evtx_file = pyevtx.file() evtx_file.set_ascii_codepage(self._codepage) try: evtx_file.open_file_object(file_object) except IOError as exception: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s} with error: {2:s}'.format( self.parser_name, file_entry.name, exception)) for record_index in range(0, evtx_file.number_of_records): try: evtx_record = evtx_file.get_record(record_index) yield WinEvtxRecordEvent(evtx_record) except IOError as exception: logging.warning(( u'[{0:s}] unable to parse event record: {1:d} in file: {2:s} ' u'with error: {3:s}').format( self.parser_name, record_index, file_entry.name, exception)) for record_index in range(0, evtx_file.number_of_recovered_records): try: evtx_record = evtx_file.get_recovered_record(record_index) yield WinEvtxRecordEvent(evtx_record, recovered=True) except IOError as exception: logging.debug(( u'[{0:s}] unable to parse recovered event record: {1:d} in file: ' u'{2:s} with error: {3:s}').format( self.parser_name, record_index, file_entry.name, exception)) file_object.close()
def Parse(self, parser_mediator, file_object, **kwargs): """Parses a single file-like object. Args: parser_mediator: a parser mediator object (instance of ParserMediator). file_object: a file-like object to parse. Raises: UnableToParseFile: when the file cannot be parsed. """ if not file_object: raise errors.UnableToParseFile(u'Invalid file object') if self._INITIAL_FILE_OFFSET is not None: file_object.seek(self._INITIAL_FILE_OFFSET, os.SEEK_SET) parser_mediator.AppendToParserChain(self) try: self.ParseFileObject(parser_mediator, file_object, **kwargs) finally: parser_mediator.PopFromParserChain()
def GetTopLevel(self, file_object): """Returns the deserialized content of a plist as a dictionary object. Args: file_object (dfvfs.FileIO): a file-like object to parse. Returns: dict[str, object]: contents of the plist. Raises: UnableToParseFile: when the file cannot be parsed. """ try: return plistlib.load(file_object) except (AttributeError, LookupError, binascii.Error, expat.ExpatError, plistlib.InvalidFileException) as exception: # LookupError will be raised in cases where the plist is an XML file # that contains an unsupported encoding. raise errors.UnableToParseFile( 'Unable to parse plist with error: {0!s}'.format(exception))
def Parse(self, file_entry): """Extract data from a UTMPX file. Args: file_entry: a file entry object. Returns: An event object (instance of UtmpxMacOsXEvent) for each logon/logoff event. """ file_object = file_entry.GetFileObject() if not self._VerifyStructure(file_object): raise errors.UnableToParseFile(u'The file is not an UTMPX file.') event_object = self._ReadEntry(file_object) while event_object: event_object.offset = file_object.tell() yield event_object event_object = self._ReadEntry(file_object) file_object.close()
def ExtractEvents(self, parser_mediator, registry_key, **kwargs): """Extract events from a Windows Registry key. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. registry_key (dfwinreg.WinRegistryKey): Windows Registry key. """ event_data = WindowsFileHistoryUsageEventData() event_data.key_path = registry_key.path for values in registry_key.GetValues(): try: name = values.name date_time = None if 'ProtectedUpToTime' in name: last_backup_time = int(values.GetDataAsObject()) date_time = dfdatetime_filetime.Filetime( timestamp=last_backup_time) event_data.last_backup_time = date_time.CopyToDateTimeString( ) continue elif 'TargetChanged' in name: target_changed = int(values.GetDataAsObject()) if target_changed == 0: event_data.target_changed = 'Not Backup storage changed' continue else: event_data.target_changed = 'Backup storage changed' continue else: continue except: errors.UnableToParseFile( 'Unable to parse NTUSER Hive for FileHistory usage') return event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_BACKUP) parser_mediator.ProduceEventWithEventData(event, event_data)
def Parse(self, parser_context, file_entry): """Extracts data from an ESE database File. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). Yields: An event event (instance of EventObject) that contains the parsed values. """ file_object = file_entry.GetFileObject() esedb_file = pyesedb.file() try: esedb_file.open_file_object(file_object) except IOError as exception: file_object.close() raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s} with error: {2:s}'.format( self.NAME, file_entry.name, exception)) # Compare the list of available plugins. cache = EseDbCache() for plugin_object in self._plugins: try: plugin_object.Process( parser_context, database=esedb_file, cache=cache) except errors.WrongPlugin: logging.debug(( u'[{0:s}] plugin: {1:s} cannot parse the ESE database: ' u'{2:s}').format( self.NAME, plugin_object.NAME, file_entry.name)) # TODO: explicitly clean up cache. esedb_file.close() file_object.close()
def ParseRecord(self, mediator, key, structure): """Parses a matching entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: UnableToParseFile: if an unsupported key is provided. """ if key not in self._SUPPORTED_KEYS: raise errors.UnableToParseFile( u'Unsupported key: {0:s}'.format(key)) timestamp = timelib.Timestamp.FromTimeParts(year=structure.year, month=structure.month, day=structure.day, hour=structure.hour, minutes=structure.minute, seconds=structure.second, timezone=mediator.timezone) reporter = structure.reporter attributes = {u'body': structure.body} plugin_object = self._plugin_objects_by_reporter.get(reporter, None) if not plugin_object: event_object = Exim4LineEvent(timestamp, 0, attributes) mediator.ProduceEvent(event_object) else: try: plugin_object.Process(mediator, timestamp, attributes) except errors.WrongPlugin: event_object = Exim4LineEvent(timestamp, 0, attributes) mediator.ProduceEvent(event_object)
def Parse(self, parser_context, file_entry): """Extract data from a UTMPX file. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). """ file_object = file_entry.GetFileObject() if not self._VerifyStructure(file_object): file_object.close() raise errors.UnableToParseFile(u'The file is not an UTMPX file.') event_object = self._ReadEntry(file_object) while event_object: event_object.offset = file_object.tell() parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) event_object = self._ReadEntry(file_object) file_object.close()
def _ParseLayerConfigJSON(self, parser_mediator, file_object): """Extracts events from a Docker filesystem layer configuration file. The path of each filesystem layer config file is: DOCKER_DIR/graph/<layer_id>/json Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file is not a valid layer config file. """ file_content = file_object.read() file_content = codecs.decode(file_content, self._ENCODING) json_dict = json.loads(file_content) if 'docker_version' not in json_dict: raise errors.UnableToParseFile( 'not a valid Docker layer configuration file, missing ' '\'docker_version\' key.') if 'created' in json_dict: layer_creation_command_array = [ x.strip() for x in json_dict['container_config']['Cmd'] ] layer_creation_command = ' '.join( layer_creation_command_array).replace('\t', '') event_data = DockerJSONLayerEventData() event_data.command = layer_creation_command event_data.layer_id = self._GetIdentifierFromPath(parser_mediator) timestamp = timelib.Timestamp.FromTimeString(json_dict['created']) event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data)
def _GetCacheFileMetadataHeaderOffset(self, file_object): """Determines the offset of the cache file metadata header. This method is inspired by the work of James Habben: https://github.com/JamesHabben/FirefoxCache2 Args: file_object (dfvfs.FileIO): a file-like object. Returns: int: offset of the file cache metadata header relative to the start of the file. Raises: UnableToParseFile: if the size of the cache file metadata cannot be determined. """ file_object.seek(-4, os.SEEK_END) file_offset = file_object.tell() metadata_size_map = self._GetDataTypeMap('uint32be') try: metadata_size, _ = self._ReadStructureFromFileObject( file_object, file_offset, metadata_size_map) except (ValueError, errors.ParseError) as exception: raise errors.UnableToParseFile( 'Unable to parse cache file metadata size with error: {0!s}'. format(exception)) # Firefox splits the content into chunks. number_of_chunks, remainder = divmod(metadata_size, self._CHUNK_SIZE) if remainder != 0: number_of_chunks += 1 # Each chunk in the cached record is padded with two bytes. # Skip the first 4 bytes which contains a hash value of the cached content. return metadata_size + (number_of_chunks * 2) + 4
def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a Firefox cache file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ filename = parser_mediator.GetFilename() display_name = parser_mediator.GetDisplayName() try: # Match cache filename. Five hex characters + 'm' + two digit # number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd' # instead contain data only. self._CACHE_FILENAME.parseString(filename) except pyparsing.ParseException: if not filename.startswith('_CACHE_00'): raise errors.UnableToParseFile('Not a Firefox cache1 file.') firefox_config = self._GetFirefoxConfig(file_object, display_name) file_object.seek(firefox_config.first_record_offset) while file_object.get_offset() < file_object.get_size(): try: self._ParseCacheEntry(parser_mediator, file_object, display_name, firefox_config.block_size) except IOError: file_offset = file_object.get_offset( ) - self._MINUMUM_BLOCK_SIZE logging.debug( ('[{0:s}] Invalid cache record in file: {1:s} at offset: ' '{2:d}.').format(self.NAME, display_name, file_offset))