def Process(self, parser_mediator, zip_file, archive_members): """Determines if this is the correct plugin; if so proceed with processing. This method checks if the zip file being contains the paths specified in REQUIRED_PATHS. If all paths are present, the plugin logic processing continues in InspectZipFile. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. zip_file (zipfile.ZipFile): the zip file. It should not be closed in this method, but will be closed by the parser logic in czip.py. archive_members (list[str]): file paths in the archive. Raises: UnableToParseFile: when the file cannot be parsed. ValueError: if a subclass has not specified REQUIRED_PATHS. """ if not self.REQUIRED_PATHS: raise ValueError('REQUIRED_PATHS not specified') if not set(archive_members).issuperset(self.REQUIRED_PATHS): raise errors.WrongCompoundZIPPlugin(self.NAME) logger.debug('Compound ZIP Plugin used: {0:s}'.format(self.NAME)) self.InspectZipFile(parser_mediator, zip_file)
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a securityd log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ self._last_month = 0 self._year_use = parser_mediator.GetEstimatedYear() try: structure = self.SECURITYD_LINE.parseString(line) except pyparsing.ParseException: logger.debug('Not a MacOS securityd log file') return False time_elements_tuple = self._GetTimeElementsTuple(structure) try: dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug( 'Not a MacOS securityd log file, invalid date and time: {0!s}'.format( structure.date_time)) return False self._last_month = time_elements_tuple[1] return True
def _ParseNoHeaderSingleLine(self, parser_mediator, structure): """Parse an isolated header line and store appropriate attributes. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ if not self._last_event_data: logger.debug( 'SkyDrive, found isolated line with no previous events') return event_data = SkyDriveOldLogEventData() event_data.offset = self._last_event_data.offset event_data.text = structure.text event = time_events.DateTimeValuesEvent( self._last_date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) # TODO think to a possible refactoring for the non-header lines. self._last_date_time = None self._last_event_data = None
def VerifyStructure(self, parser_mediator, lines): """Verify that this file is a Google Drive Sync log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ try: structure = self._GDS_LINE.parseString(lines) except pyparsing.ParseException as exception: logger.debug('Not a Google Drive Sync log file: {0!s}'.format(exception)) return False date_time = dfdatetime_time_elements.TimeElementsInMilliseconds() date_time_string = self._GetValueFromStructure(structure, 'date_time') try: datetime_iso8601 = self._GetISO8601String(date_time_string) date_time.CopyFromStringISO8601(datetime_iso8601) except ValueError as exception: logger.debug(( 'Not a Google Drive Sync log file, invalid date/time: {0!s} ' 'with error: {1!s}').format(date_time_string, exception)) return False return True
def VerifyStructure(self, parser_mediator, lines): """Verify that this file is a SkyDrive log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ try: structure = self._SDF_HEADER.parseString(lines) except pyparsing.ParseException: logger.debug('Not a SkyDrive log file') return False time_elements_tuple = self._GetValueFromStructure( structure, 'header_date_time') try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug( 'Not a SkyDrive log file, invalid date and time: {0!s}'.format( time_elements_tuple)) return False return True
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a SkyDrive old log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._LINE.parseString(line) except pyparsing.ParseException: logger.debug('Not a SkyDrive old log file') return False time_elements_tuple = self._GetValueFromStructure(structure, 'date_time') # TODO: what if time elements tuple is None. day_of_month, month, year, hours, minutes, seconds, milliseconds = ( time_elements_tuple) time_elements_tuple = ( year, month, day_of_month, hours, minutes, seconds, milliseconds) try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug( 'Not a SkyDrive old log file, invalid date and time: {0!s}'.format( time_elements_tuple)) return False return True
def VerifyStructure(self, parser_mediator, lines): """Verify that this file is a SkyDrive log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ try: structure = self._SDF_HEADER.parseString(lines) except pyparsing.ParseException: logger.debug('Not a SkyDrive log file') return False try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=structure.header_date_time) except ValueError: logger.debug( 'Not a SkyDrive log file, invalid date and time: {0!s}'.format( structure.header_date_time)) return False return True
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a securityd log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ self._last_month = 0 self._year_use = parser_mediator.GetEstimatedYear() try: structure = self.SECURITYD_LINE.parseString(line) except pyparsing.ParseException: logger.debug('Not a MacOS securityd log file') return False time_elements_tuple = self._GetTimeElementsTuple(structure) try: dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug( 'Not a MacOS securityd log file, invalid date and time: {0!s}'. format(time_elements_tuple)) return False self._last_month = time_elements_tuple[1] return True
def Process(self, parser_mediator, zip_file, archive_members): """Determines if this is the correct plugin; if so proceed with processing. This method checks if the ZIP file being contains the paths specified in REQUIRED_PATHS. If all paths are present, the plugin logic processing continues in InspectZipFile. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. zip_file (zipfile.ZipFile): the ZIP file. It should not be closed in this method, but will be closed by the parser logic in czip.py. archive_members (list[str]): file paths in the archive. Raises: UnableToParseFile: when the file cannot be parsed. ValueError: if a subclass has not specified REQUIRED_PATHS. WrongCompoundZIPPlugin: If this plugin is not able to process the given file. """ if not self.REQUIRED_PATHS: raise ValueError('REQUIRED_PATHS not specified') if not set(archive_members).issuperset(self.REQUIRED_PATHS): raise errors.WrongCompoundZIPPlugin(self.NAME) logger.debug('Compound ZIP Plugin used: {0:s}'.format(self.NAME)) self.InspectZipFile(parser_mediator, zip_file)
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a vsftpd log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._LOG_LINE.parseString(line) except pyparsing.ParseException: return False if (' [pid ' not in line) or (': Client ' not in line): return False time_elements_tuple = self._GetTimeElementsTuple(structure) try: dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug(('Not a vsftpd log file, invalid date and time: ' '{0!s}').format(time_elements_tuple)) return False return True
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a XChat scrollback log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line was successfully parsed. """ try: structure = self.LOG_LINE.parseString(line) except pyparsing.ParseException: logger.debug('Not a XChat scrollback log file') return False timestamp = self._GetValueFromStructure(structure, 'timestamp') try: int(timestamp, 10) except (TypeError, ValueError): logger.debug('Not a XChat scrollback log file, invalid timestamp.') return False return True
def _ParseHeader(self, parser_mediator, file_object): """Parses a CUPS IPP header from a file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): file-like object. Raises: UnableToParseFile: when the header cannot be parsed. """ header_map = self._GetDataTypeMap('cups_ipp_header') try: header, _ = self._ReadStructureFromFileObject(file_object, 0, header_map) except (ValueError, errors.ParseError) as exception: raise errors.UnableToParseFile( '[{0:s}] Unable to parse header with error: {1!s}'.format( self.NAME, exception)) format_version = '{0:d}.{1:d}'.format( header.major_version, header.minor_version) if format_version not in self._SUPPORTED_FORMAT_VERSIONS: raise errors.UnableToParseFile( '[{0:s}] Unsupported format version {1:s}.'.format( self.NAME, format_version)) if header.operation_identifier != 5: # TODO: generate ExtractionWarning instead of printing debug output. display_name = parser_mediator.GetDisplayName() logger.debug(( '[{0:s}] Non-standard operation identifier: 0x{1:08x} in file header ' 'of: {2:s}.').format( self.NAME, header.operation_identifier, display_name))
def _ParseKey(self, parser_mediator, registry_key): """Parses the Registry key with a specific plugin. Args: parser_mediator (ParserMediator): parser mediator. registry_key (dfwinreg.WinRegistryKey): Windows Registry key. """ matching_plugin = None logger.debug('Parsing Windows Registry key: {0:s}'.format( registry_key.path)) normalized_key_path = self._NormalizeKeyPath(registry_key.path) if self._path_filter.CheckPath(normalized_key_path): matching_plugin = self._plugin_per_key_path[normalized_key_path] else: for plugin in self._plugins_without_key_paths: if self._CanProcessKeyWithPlugin(registry_key, plugin): matching_plugin = plugin break if not matching_plugin: matching_plugin = self._default_plugin if matching_plugin: self._ParseKeyWithPlugin(parser_mediator, registry_key, matching_plugin)
def Process(self, parser_mediator, data, **kwargs): """Determine if this is the correct plugin; if so proceed with processing. Process() checks if the current bencode file being processed is a match for a plugin by comparing the PATH and KEY requirements defined by a plugin. If both match processing continues; else raise WrongBencodePlugin. This function also extracts the required keys as defined in self.BENCODE_KEYS from the file and stores the result in match[key] and calls self.GetEntries() which holds the processing logic implemented by the plugin. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. data (dict[str, object]): bencode data values. Raises: WrongBencodePlugin: If this plugin is not able to process the given file. ValueError: If top level is not set. """ if data is None: raise ValueError('Data is not set.') if not set(data.keys()).issuperset(self.BENCODE_KEYS): raise errors.WrongBencodePlugin(self.NAME) # This will raise if unhandled keyword arguments are passed. super(BencodePlugin, self).Process(parser_mediator) logger.debug('Bencode Plugin Used: {0:s}'.format(self.NAME)) self.GetEntries(parser_mediator, data=data)
def ParseFileObject(self, parser_mediator, file_object): """Parses a Firefox cache file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ filename = parser_mediator.GetFilename() if (not self._CACHE_FILENAME_RE.match(filename) and not filename.startswith('_CACHE_00')): raise errors.UnableToParseFile('Not a Firefox cache1 file.') display_name = parser_mediator.GetDisplayName() firefox_config = self._GetFirefoxConfig(file_object, display_name) file_object.seek(firefox_config.first_record_offset) while file_object.get_offset() < file_object.get_size(): try: self._ParseCacheEntry(parser_mediator, file_object, display_name, firefox_config.block_size) except IOError: file_offset = file_object.get_offset( ) - self._MINIMUM_BLOCK_SIZE logger.debug( ('[{0:s}] Invalid cache record in file: {1:s} at offset: ' '{2:d}.').format(self.NAME, display_name, file_offset))
def _ParseHeader(self, parser_mediator, file_object): """Parses a CUPS IPP header from a file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): file-like object. Raises: UnableToParseFile: when the header cannot be parsed. """ header_map = self._GetDataTypeMap('cups_ipp_header') try: header, _ = self._ReadStructureFromFileObject( file_object, 0, header_map) except (ValueError, errors.ParseError) as exception: raise errors.UnableToParseFile( '[{0:s}] Unable to parse header with error: {1!s}'.format( self.NAME, exception)) format_version = '{0:d}.{1:d}'.format(header.major_version, header.minor_version) if format_version not in self._SUPPORTED_FORMAT_VERSIONS: raise errors.UnableToParseFile( '[{0:s}] Unsupported format version {1:s}.'.format( self.NAME, format_version)) if header.operation_identifier != 5: # TODO: generate ExtractionWarning instead of printing debug output. display_name = parser_mediator.GetDisplayName() logger.debug(( '[{0:s}] Non-standard operation identifier: 0x{1:08x} in file header ' 'of: {2:s}.').format(self.NAME, header.operation_identifier, display_name))
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a SkyDrive old log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._LINE.parseString(line) except pyparsing.ParseException: logger.debug('Not a SkyDrive old log file') return False day_of_month, month, year, hours, minutes, seconds, milliseconds = ( structure.date_time) time_elements_tuple = ( year, month, day_of_month, hours, minutes, seconds, milliseconds) try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug( 'Not a SkyDrive old log file, invalid date and time: {0!s}'.format( structure.date_time)) return False return True
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a XChat log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._HEADER.parseString(line) except pyparsing.ParseException: logger.debug('Not a XChat log file') return False _, month, day, hours, minutes, seconds, year = structure.date_time month = timelib.MONTH_DICT.get(month.lower(), 0) time_elements_tuple = (year, month, day, hours, minutes, seconds) try: dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug('Not a XChat log file, invalid date and time: {0!s}'.format( structure.date_time)) return False return True
def VerifyStructure(self, parser_mediator, lines): """Verify that this file is a Google Drive Sync log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ try: structure = self._GDS_LINE.parseString(lines) except pyparsing.ParseException as exception: logger.debug( 'Not a Google Drive Sync log file: {0!s}'.format(exception)) return False date_time = dfdatetime_time_elements.TimeElementsInMilliseconds() date_time_string = self._GetValueFromStructure(structure, 'date_time') try: datetime_iso8601 = self._GetISO8601String(date_time_string) date_time.CopyFromStringISO8601(datetime_iso8601) except ValueError as exception: logger.debug( ('Not a Google Drive Sync log file, invalid date/time: {0!s} ' 'with error: {1!s}').format(date_time_string, exception)) return False return True
def ParseFileObject(self, parser_mediator, file_object): """Parses a Firefox cache file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ filename = parser_mediator.GetFilename() if (not self._CACHE_FILENAME_RE.match(filename) and not filename.startswith('_CACHE_00')): raise errors.UnableToParseFile('Not a Firefox cache1 file.') display_name = parser_mediator.GetDisplayName() firefox_config = self._GetFirefoxConfig(file_object, display_name) file_object.seek(firefox_config.first_record_offset) while file_object.get_offset() < file_object.get_size(): try: self._ParseCacheEntry( parser_mediator, file_object, display_name, firefox_config.block_size) except IOError: file_offset = file_object.get_offset() - self._MINIMUM_BLOCK_SIZE logger.debug(( '[{0:s}] Invalid cache record in file: {1:s} at offset: ' '{2:d}.').format(self.NAME, display_name, file_offset))
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a XChat log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._HEADER.parseString(line) except pyparsing.ParseException: logger.debug('Not a XChat log file') return False _, month, day, hours, minutes, seconds, year = structure.date_time month = timelib.MONTH_DICT.get(month.lower(), 0) time_elements_tuple = (year, month, day, hours, minutes, seconds) try: dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug( 'Not a XChat log file, invalid date and time: {0!s}'.format( structure.date_time)) return False return True
def _RecurseKey(self, plist_item, depth=15, key_path=''): """Flattens nested dictionaries and lists by yielding its values. The hierarchy of a plist file is a series of nested dictionaries and lists. This is a helper function helps plugins navigate the structure without having to reimplement their own recursive methods. This method implements an overridable depth limit to prevent processing extremely deeply nested plists. If the limit is reached a debug message is logged indicating which key processing stopped on. Example Input Plist: plist_item = { DeviceRoot: { DeviceMAC1: [Value1, Value2, Value3], DeviceMAC2: [Value1, Value2, Value3]}} Example Output: ('', DeviceRoot, {DeviceMACs...}) (DeviceRoot, DeviceMAC1, [Value1, Value2, Value3]) (DeviceRoot, DeviceMAC2, [Value1, Value2, Value3]) Args: plist_item (object): plist item to be checked for additional nested items. depth (Optional[int]): current recursion depth. This value is used to ensure we stop at the maximum recursion depth. key_path (Optional[str]): path of the current working key. Yields: tuple[str, str, object]: key path, key name and value. """ if depth < 1: logger.debug( 'Maximum recursion depth of 15 reached for key: {0:s}'.format( key_path)) elif isinstance(plist_item, (list, tuple)): for sub_plist_item in plist_item: for subkey_values in self._RecurseKey(sub_plist_item, depth=depth - 1, key_path=key_path): yield subkey_values elif hasattr(plist_item, 'items'): for subkey_name, value in plist_item.items(): yield key_path, subkey_name, value if isinstance(value, dict): value = [value] elif not isinstance(value, (list, tuple)): continue for sub_plist_item in value: if isinstance(sub_plist_item, dict): subkey_path = '{0:s}/{1:s}'.format( key_path, subkey_name) for subkey_values in self._RecurseKey( sub_plist_item, depth=depth - 1, key_path=subkey_path): yield subkey_values
def ParseFileObject(self, parser_mediator, file_object): """Parses a bencoded file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ header_data = file_object.read(2) if not self._BENCODE_RE.match(header_data): raise errors.UnableToParseFile('Not a valid Bencoded file.') bencode_file = BencodeFile() try: bencode_file.Open(file_object) except IOError as exception: diplay_name = parser_mediator.GetDisplayName() raise errors.UnableToParseFile( '[{0:s}] unable to parse file: {1:s} with error: {2!s}'.format( self.NAME, diplay_name, exception)) if not bencode_file.decoded_values: parser_mediator.ProduceExtractionWarning( 'missing decoded Bencode values') return try: for plugin in self._plugins: if parser_mediator.abort: break file_entry = parser_mediator.GetFileEntry() display_name = parser_mediator.GetDisplayName(file_entry) if not plugin.CheckRequiredKeys(bencode_file): logger.debug( 'Skipped parsing file: {0:s} with plugin: {1:s}'. format(display_name, plugin.NAME)) continue logger.debug('Parsing file: {0:s} with plugin: {1:s}'.format( display_name, plugin.NAME)) try: plugin.UpdateChainAndProcess(parser_mediator, bencode_file=bencode_file) except Exception as exception: # pylint: disable=broad-except parser_mediator.ProduceExtractionWarning(( 'plugin: {0:s} unable to parse Bencode file with error: ' '{1!s}').format(plugin.NAME, exception)) finally: bencode_file.Close()
def RecurseKey(recur_item, depth=15, key_path=''): """Flattens nested dictionaries and lists by yielding it's values. The hierarchy of a plist file is a series of nested dictionaries and lists. This is a helper function helps plugins navigate the structure without having to reimplement their own recursive methods. This method implements an overridable depth limit to prevent processing extremely deeply nested plists. If the limit is reached a debug message is logged indicating which key processing stopped on. Example Input Plist: recur_item = { DeviceRoot: { DeviceMAC1: [Value1, Value2, Value3], DeviceMAC2: [Value1, Value2, Value3]}} Example Output: ('', DeviceRoot, {DeviceMACs...}) (DeviceRoot, DeviceMAC1, [Value1, Value2, Value3]) (DeviceRoot, DeviceMAC2, [Value1, Value2, Value3]) Args: recur_item: An object to be checked for additional nested items. depth: Optional integer indication the current recursion depth. This value is used to ensure we stop at the maximum recursion depth. key_path: Optional path of the current working key. Yields: A tuple of the key path, key, and value from a plist. """ if depth < 1: logger.debug('Recursion limit hit for key: {0:s}'.format(key_path)) return if isinstance(recur_item, (list, tuple)): for recur in recur_item: for key in RecurseKey(recur, depth=depth, key_path=key_path): yield key return if not hasattr(recur_item, 'items'): return for subkey, value in recur_item.items(): yield key_path, subkey, value if isinstance(value, dict): value = [value] if isinstance(value, list): for item in value: if not isinstance(item, dict): continue subkey_path = '{0:s}/{1:s}'.format(key_path, subkey) for tuple_value in RecurseKey(item, depth=depth - 1, key_path=subkey_path): yield tuple_value
def RecurseKey(recur_item, depth=15, key_path=''): """Flattens nested dictionaries and lists by yielding it's values. The hierarchy of a plist file is a series of nested dictionaries and lists. This is a helper function helps plugins navigate the structure without having to reimplement their own recursive methods. This method implements an overridable depth limit to prevent processing extremely deeply nested plists. If the limit is reached a debug message is logged indicating which key processing stopped on. Example Input Plist: recur_item = { DeviceRoot: { DeviceMAC1: [Value1, Value2, Value3], DeviceMAC2: [Value1, Value2, Value3]}} Example Output: ('', DeviceRoot, {DeviceMACs...}) (DeviceRoot, DeviceMAC1, [Value1, Value2, Value3]) (DeviceRoot, DeviceMAC2, [Value1, Value2, Value3]) Args: recur_item: An object to be checked for additional nested items. depth: Optional integer indication the current recursion depth. This value is used to ensure we stop at the maximum recursion depth. key_path: Optional path of the current working key. Yields: A tuple of the key path, key, and value from a plist. """ if depth < 1: logger.debug('Recursion limit hit for key: {0:s}'.format(key_path)) return if isinstance(recur_item, (list, tuple)): for recur in recur_item: for key in RecurseKey(recur, depth=depth, key_path=key_path): yield key return if not hasattr(recur_item, 'items'): return for subkey, value in iter(recur_item.items()): yield key_path, subkey, value if isinstance(value, dict): value = [value] if isinstance(value, list): for item in value: if not isinstance(item, dict): continue subkey_path = '{0:s}/{1:s}'.format(key_path, subkey) for tuple_value in RecurseKey( item, depth=depth - 1, key_path=subkey_path): yield tuple_value
def Process(self, parser_mediator, plist_name, top_level, **kwargs): """Determine if this is the correct plugin; if so proceed with processing. Process() checks if the current plist being processed is a match for a plugin by comparing the PATH and KEY requirements defined by a plugin. If both match processing continues; else raise WrongPlistPlugin. This function also extracts the required keys as defined in self.PLIST_KEYS from the plist and stores the result in self.match[key] and calls self.GetEntries() which holds the processing logic implemented by the plugin. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. plist_name (str): name of the plist. top_level (dict[str, object]): plist top-level key. Raises: WrongPlistPlugin: If this plugin is not able to process the given file. ValueError: If top_level or plist_name are not set. """ if plist_name is None or top_level is None: raise ValueError('Top level or plist name are not set.') if plist_name.lower() != self.PLIST_PATH.lower(): raise errors.WrongPlistPlugin(self.NAME, plist_name) if isinstance(top_level, dict): if not set(top_level.keys()).issuperset(self.PLIST_KEYS): raise errors.WrongPlistPlugin(self.NAME, plist_name) else: # Make sure we are getting back an object that has an iterator. if not hasattr(top_level, '__iter__'): raise errors.WrongPlistPlugin(self.NAME, plist_name) # This is a list and we need to just look at the first level # of keys there. keys = [] for top_level_entry in top_level: if isinstance(top_level_entry, dict): keys.extend(top_level_entry.keys()) # Compare this is a set, which removes possible duplicate entries # in the list. if not set(keys).issuperset(self.PLIST_KEYS): raise errors.WrongPlistPlugin(self.NAME, plist_name) # This will raise if unhandled keyword arguments are passed. super(PlistPlugin, self).Process(parser_mediator) logger.debug('Plist Plugin Used: {0:s} for: {1:s}'.format( self.NAME, plist_name)) match = self._GetKeys(top_level, self.PLIST_KEYS) self.GetEntries(parser_mediator, top_level=top_level, match=match)
def Process(self, parser_mediator, plist_name, top_level, **kwargs): """Determine if this is the correct plugin; if so proceed with processing. Process() checks if the current plist being processed is a match for a plugin by comparing the PATH and KEY requirements defined by a plugin. If both match processing continues; else raise WrongPlistPlugin. This function also extracts the required keys as defined in self.PLIST_KEYS from the plist and stores the result in self.match[key] and calls self.GetEntries() which holds the processing logic implemented by the plugin. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. plist_name (str): name of the plist. top_level (dict[str, object]): plist top-level key. Raises: ValueError: If top_level or plist_name are not set. WrongPlistPlugin: If this plugin is not able to process the given file. """ if plist_name is None or top_level is None: raise ValueError('Top level or plist name are not set.') if plist_name.lower() != self.PLIST_PATH.lower(): raise errors.WrongPlistPlugin(self.NAME, plist_name) if isinstance(top_level, dict): if not set(top_level.keys()).issuperset(self.PLIST_KEYS): raise errors.WrongPlistPlugin(self.NAME, plist_name) else: # Make sure we are getting back an object that has an iterator. if not hasattr(top_level, '__iter__'): raise errors.WrongPlistPlugin(self.NAME, plist_name) # This is a list and we need to just look at the first level # of keys there. keys = [] for top_level_entry in top_level: if isinstance(top_level_entry, dict): keys.extend(top_level_entry.keys()) # Compare this is a set, which removes possible duplicate entries # in the list. if not set(keys).issuperset(self.PLIST_KEYS): raise errors.WrongPlistPlugin(self.NAME, plist_name) # This will raise if unhandled keyword arguments are passed. super(PlistPlugin, self).Process(parser_mediator) logger.debug('Plist Plugin Used: {0:s} for: {1:s}'.format( self.NAME, plist_name)) match = self._GetKeys(top_level, self.PLIST_KEYS) self.GetEntries(parser_mediator, top_level=top_level, match=match)
def ParseFileObject(self, parser_mediator, file_object): """Parses a compound ZIP file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ display_name = parser_mediator.GetDisplayName() if not zipfile.is_zipfile(file_object): raise errors.UnableToParseFile( '[{0:s}] unable to parse file: {1:s} with error: {2:s}'.format( self.NAME, display_name, 'Not a Zip file.')) try: zip_file = zipfile.ZipFile(file_object, 'r', allowZip64=True) # pylint: disable=consider-using-with # Some non-ZIP files return true for is_zipfile but will fail with another # error like a negative seek (IOError). Note that this function can raise # many different exceptions. except Exception as exception: # pylint: disable=broad-except raise errors.UnableToParseFile( '[{0:s}] unable to parse file: {1:s} with error: {2!s}'.format( self.NAME, display_name, exception)) for plugin in self._plugins: if parser_mediator.abort: break file_entry = parser_mediator.GetFileEntry() display_name = parser_mediator.GetDisplayName(file_entry) if not plugin.CheckRequiredPaths(zip_file): logger.debug( 'Skipped parsing file: {0:s} with plugin: {1:s}'.format( display_name, plugin.NAME)) continue logger.debug('Parsing file: {0:s} with plugin: {1:s}'.format( display_name, plugin.NAME)) try: plugin.UpdateChainAndProcess(parser_mediator, zip_file=zip_file) except Exception as exception: # pylint: disable=broad-except parser_mediator.ProduceExtractionWarning(( 'plugin: {0:s} unable to parse ZIP file: {1:s} with error: ' '{2!s}').format(plugin.NAME, display_name, exception)) zip_file.close()
def ParseFileObject(self, parser_mediator, file_object): """Parses a bencoded file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ header_data = file_object.read(2) if not self._BENCODE_RE.match(header_data): raise errors.UnableToParseFile('Not a valid Bencoded file.') file_object.seek(0, os.SEEK_SET) try: decoded_values = bencode.bread(file_object) except (IOError, bencode.BencodeDecodeError) as exception: diplay_name = parser_mediator.GetDisplayName() raise errors.UnableToParseFile( '[{0:s}] unable to parse file: {1:s} with error: {2!s}'.format( self.NAME, diplay_name, exception)) if not decoded_values: parser_mediator.ProduceExtractionWarning('missing decoded Bencode values') return bencode_keys = set() for key in decoded_values.keys(): if isinstance(key, bytes): # Work-around for issue in bencode 3.0.1 where keys are bytes. key = key.decode('utf-8') bencode_keys.add(key) for plugin in self._plugins: if parser_mediator.abort: break if not bencode_keys.issuperset(plugin.BENCODE_KEYS): continue logger.debug('Bencode plugin used: {0:s}'.format(plugin.NAME)) try: plugin.UpdateChainAndProcess( parser_mediator, decoded_values=decoded_values) except Exception as exception: # pylint: disable=broad-except parser_mediator.ProduceExtractionWarning(( 'plugin: {0:s} unable to parse Bencode file with error: ' '{1!s}').format(plugin.NAME, exception))
def ParseFileObject(self, parser_mediator, file_object, **unused_kwargs): """Parses a plist file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ filename = parser_mediator.GetFilename() file_size = file_object.get_size() if file_size <= 0: raise errors.UnableToParseFile( 'File size: {0:d} bytes is less equal 0.'.format(file_size)) # 50MB is 10x larger than any plist seen to date. if file_size > 50000000: raise errors.UnableToParseFile( 'File size: {0:d} bytes is larger than 50 MB.'.format( file_size)) top_level_object = None try: top_level_object = self.GetTopLevel(file_object) except errors.UnableToParseFile: raise if not top_level_object: raise errors.UnableToParseFile( 'Unable to parse: {0:s} skipping.'.format(filename)) # TODO: add a parser filter. matching_plugin = None for plugin in self._plugins: try: plugin.UpdateChainAndProcess(parser_mediator, plist_name=filename, top_level=top_level_object) matching_plugin = plugin except errors.WrongPlistPlugin as exception: logger.debug('Wrong plugin: {0:s} for: {1:s}'.format( exception.args[0], exception.args[1])) if not matching_plugin and self._default_plugin: self._default_plugin.UpdateChainAndProcess( parser_mediator, plist_name=filename, top_level=top_level_object)
def _ParseHeader(self, parser_mediator, structure): """Parses a log header. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ time_elements_tuple = self._GetValueFromStructure( structure, 'date_time') # TODO: what if time_elements_tuple is None. _, month, day, hours, minutes, seconds, year = time_elements_tuple month = timelib.MONTH_DICT.get(month.lower(), 0) time_elements_tuple = (year, month, day, hours, minutes, seconds) try: date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) date_time.is_local_time = True except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(time_elements_tuple)) return self._last_month = month event_data = XChatLogEventData() log_action = self._GetValueFromStructure(structure, 'log_action', default_value=[]) if log_action[0] == 'BEGIN': self._xchat_year = year event_data.text = 'XChat start logging' elif log_action[0] == 'END': self._xchat_year = None event_data.text = 'XChat end logging' else: logger.debug('Unknown log action: {0:s}.'.format( ' '.join(log_action))) return event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, event_data)
def _GetFirefoxConfig(self, file_object, display_name): """Determine cache file block size. Args: file_object (dfvfs.FileIO): a file-like object. display_name (str): display name. Returns: firefox_cache_config: namedtuple containing the block size and first record offset. Raises: UnableToParseFile: if no valid cache record could be found. """ # There ought to be a valid record within the first 4 MiB. We use this # limit to prevent reading large invalid files. to_read = min(file_object.get_size(), self._INITIAL_CACHE_FILE_SIZE) while file_object.get_offset() < to_read: offset = file_object.get_offset() try: cache_entry, _ = self._ReadCacheEntry(file_object, display_name, self._MINIMUM_BLOCK_SIZE) # We have not yet determined the block size, so we use the smallest # possible size. record_size = (self._CACHE_ENTRY_HEADER_SIZE + cache_entry.request_size + cache_entry.information_size) if record_size >= 4096: # _CACHE_003_ block_size = 4096 elif record_size >= 1024: # _CACHE_002_ block_size = 1024 else: # _CACHE_001_ block_size = 256 return self.FIREFOX_CACHE_CONFIG(block_size, offset) except IOError: logger.debug('[{0:s}] {1:s}:{2:d}: Invalid record.'.format( self.NAME, display_name, offset)) raise errors.UnableToParseFile( 'Could not find a valid cache record. Not a Firefox cache file.')
def ParseFileObject(self, parser_mediator, file_object): """Parses an ESE database file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): file-like object. """ database = ESEDatabase() try: database.Open(file_object) except (IOError, ValueError) as exception: parser_mediator.ProduceExtractionWarning( 'unable to open file with error: {0!s}'.format(exception)) return # Compare the list of available plugin objects. cache = ESEDBCache() try: for plugin in self._plugins: if parser_mediator.abort: break file_entry = parser_mediator.GetFileEntry() display_name = parser_mediator.GetDisplayName(file_entry) if not plugin.CheckRequiredTables(database): logger.debug( 'Skipped parsing file: {0:s} with plugin: {1:s}'. format(display_name, plugin.NAME)) continue logger.debug('Parsing file: {0:s} with plugin: {1:s}'.format( display_name, plugin.NAME)) try: plugin.UpdateChainAndProcess(parser_mediator, cache=cache, database=database) except Exception as exception: # pylint: disable=broad-except parser_mediator.ProduceExtractionWarning(( 'plugin: {0:s} unable to parse ESE database with error: ' '{1!s}').format(plugin.NAME, exception)) finally: # TODO: explicitly clean up cache. database.Close()
def _GetFirefoxConfig(self, file_object, display_name): """Determine cache file block size. Args: file_object (dfvfs.FileIO): a file-like object. display_name (str): display name. Returns: firefox_cache_config: namedtuple containing the block size and first record offset. Raises: UnableToParseFile: if no valid cache record could be found. """ # There ought to be a valid record within the first 4 MiB. We use this # limit to prevent reading large invalid files. to_read = min(file_object.get_size(), self._INITIAL_CACHE_FILE_SIZE) while file_object.get_offset() < to_read: offset = file_object.get_offset() try: cache_entry, _ = self._ReadCacheEntry( file_object, display_name, self._MINIMUM_BLOCK_SIZE) # We have not yet determined the block size, so we use the smallest # possible size. record_size = ( self._CACHE_ENTRY_HEADER_SIZE + cache_entry.request_size + cache_entry.information_size) if record_size >= 4096: # _CACHE_003_ block_size = 4096 elif record_size >= 1024: # _CACHE_002_ block_size = 1024 else: # _CACHE_001_ block_size = 256 return self.FIREFOX_CACHE_CONFIG(block_size, offset) except IOError: logger.debug('[{0:s}] {1:s}:{2:d}: Invalid record.'.format( self.NAME, display_name, offset)) raise errors.UnableToParseFile( 'Could not find a valid cache record. Not a Firefox cache file.')
def _ParseHeader(self, parser_mediator, structure): """Parses a log header. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ time_elements_tuple = self._GetValueFromStructure(structure, 'date_time') # TODO: what if time_elements_tuple is None. _, month, day, hours, minutes, seconds, year = time_elements_tuple month = timelib.MONTH_DICT.get(month.lower(), 0) time_elements_tuple = (year, month, day, hours, minutes, seconds) try: date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) date_time.is_local_time = True except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(time_elements_tuple)) return self._last_month = month event_data = XChatLogEventData() log_action = self._GetValueFromStructure( structure, 'log_action', default_value=[]) if log_action[0] == 'BEGIN': self._xchat_year = year event_data.text = 'XChat start logging' elif log_action[0] == 'END': self._xchat_year = None event_data.text = 'XChat end logging' else: logger.debug('Unknown log action: {0:s}.'.format( ' '.join(log_action))) return event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, event_data)
def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) # TODO: Add anomaly objects for abnormal timestamps, such as when the log # timestamp is greater than the session start. if key == 'logline': self._ParseLogLine(parser_mediator, structure) else: timestamp = self._GetValueFromStructure(structure, 'timestamp') if timestamp is None: logger.debug('[{0:s}] {1:s} with invalid timestamp.'.format( self.NAME, key)) return session = self._GetValueFromStructure(structure, 'session') event_data = PopularityContestSessionEventData() # TODO: determine why session is formatted as a string. event_data.session = '{0!s}'.format(session) if key == 'header': event_data.details = self._GetValueFromStructure( structure, 'details') event_data.hostid = self._GetValueFromStructure( structure, 'id') event_data.status = 'start' elif key == 'footer': event_data.status = 'end' date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data)
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a Mac Wifi log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ self._last_month = 0 self._year_use = parser_mediator.GetEstimatedYear() key = 'header' try: structure = self._MAC_WIFI_HEADER.parseString(line) except pyparsing.ParseException: structure = None if not structure: key = 'turned_over_header' try: structure = self._MAC_WIFI_TURNED_OVER_HEADER.parseString(line) except pyparsing.ParseException: structure = None if not structure: logger.debug('Not a Mac Wifi log file') return False time_elements_tuple = self._GetTimeElementsTuple(key, structure) try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug( 'Not a Mac Wifi log file, invalid date and time: {0!s}'.format( structure.date_time)) return False self._last_month = time_elements_tuple[1] return True
def ParseFileObject(self, parser_mediator, file_object): """Parses a plist file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ filename = parser_mediator.GetFilename() file_size = file_object.get_size() if file_size <= 0: raise errors.UnableToParseFile( 'File size: {0:d} bytes is less equal 0.'.format(file_size)) # 50MB is 10x larger than any plist seen to date. if file_size > 50000000: raise errors.UnableToParseFile( 'File size: {0:d} bytes is larger than 50 MB.'.format(file_size)) try: top_level_object = self.GetTopLevel(file_object) except errors.UnableToParseFile: raise if not top_level_object: raise errors.UnableToParseFile( 'Unable to parse: {0:s} skipping.'.format(filename)) # TODO: add a parser filter. matching_plugin = None for plugin in self._plugins: try: plugin.UpdateChainAndProcess( parser_mediator, plist_name=filename, top_level=top_level_object) matching_plugin = plugin except errors.WrongPlistPlugin as exception: logger.debug('Wrong plugin: {0:s} for: {1:s}'.format( exception.args[0], exception.args[1])) if not matching_plugin and self._default_plugin: self._default_plugin.UpdateChainAndProcess( parser_mediator, plist_name=filename, top_level=top_level_object)
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a Mac Wifi log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ self._last_month = 0 self._year_use = parser_mediator.GetEstimatedYear() key = 'header' try: structure = self._MAC_WIFI_HEADER.parseString(line) except pyparsing.ParseException: structure = None if not structure: key = 'turned_over_header' try: structure = self._MAC_WIFI_TURNED_OVER_HEADER.parseString(line) except pyparsing.ParseException: structure = None if not structure: logger.debug('Not a Mac Wifi log file') return False time_elements_tuple = self._GetTimeElementsTuple(key, structure) try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug( 'Not a Mac Wifi log file, invalid date and time: {0!s}'.format( time_elements_tuple)) return False self._last_month = time_elements_tuple[1] return True
def _ProcessZipFileWithPlugins(self, parser_mediator, zip_file): """Processes a zip file using all compound zip files. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. zip_file (zipfile.ZipFile): the zip file. It should not be closed in this method, but will be closed in ParseFileObject(). """ archive_members = zip_file.namelist() for plugin in self._plugins: try: plugin.UpdateChainAndProcess( parser_mediator, zip_file=zip_file, archive_members=archive_members) except errors.WrongCompoundZIPPlugin as exception: logger.debug('[{0:s}] wrong plugin: {1!s}'.format( self.NAME, exception))
def ParseString(self, string): """Parses a string. Args: string (str): string to parse. Returns: pyparsing.ParseResults: parsed tokens or None if the string could not be parsed. """ try: return self.expression.parseString(string) except pyparsing.ParseException as exception: logger.debug( 'Unable to parse string with error: {0!s}'.format(exception)) return None
def _RecurseKey(self, recur_item, root='', depth=15): """Flattens nested dictionaries and lists by yielding their values. The hierarchy of a bencode file is a series of nested dictionaries and lists. This is a helper function helps plugins navigate the structure without having to reimplement their own recursive methods. This method implements an overridable depth limit to prevent processing extremely deeply nested dictionaries. If the limit is reached a debug message is logged indicating which key processing stopped on. Args: recur_item (object): object to be checked for additional nested items. root (str): the pathname of the current working key. depth (int): a counter to ensure we stop at the maximum recursion depth. Yields: tuple: containing: str: root str: key str: value """ if depth < 1: logger.debug('Recursion limit hit for key: {0:s}'.format(root)) return if isinstance(recur_item, (list, tuple)): for recur in recur_item: for key in self._RecurseKey(recur, root, depth): yield key return if not hasattr(recur_item, 'iteritems'): return for key, value in iter(recur_item.items()): yield root, key, value if isinstance(value, dict): value = [value] if isinstance(value, list): for item in value: if isinstance(item, dict): for keyval in self._RecurseKey(item, root=root + '/' + key, depth=depth - 1): yield keyval
def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) # TODO: Add anomaly objects for abnormal timestamps, such as when the log # timestamp is greater than the session start. if key == 'logline': self._ParseLogLine(parser_mediator, structure) else: timestamp = self._GetValueFromStructure(structure, 'timestamp') if timestamp is None: logger.debug('[{0:s}] {1:s} with invalid timestamp.'.format( self.NAME, key)) return session = self._GetValueFromStructure(structure, 'session') event_data = PopularityContestSessionEventData() # TODO: determine why session is formatted as a string. event_data.session = '{0!s}'.format(session) if key == 'header': event_data.details = self._GetValueFromStructure(structure, 'details') event_data.hostid = self._GetValueFromStructure(structure, 'id') event_data.status = 'start' elif key == 'footer': event_data.status = 'end' date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data)
def _RecurseKey(self, recur_item, root='', depth=15): """Flattens nested dictionaries and lists by yielding their values. The hierarchy of a bencode file is a series of nested dictionaries and lists. This is a helper function helps plugins navigate the structure without having to reimplement their own recursive methods. This method implements an overridable depth limit to prevent processing extremely deeply nested dictionaries. If the limit is reached a debug message is logged indicating which key processing stopped on. Args: recur_item (object): object to be checked for additional nested items. root (str): the pathname of the current working key. depth (int): a counter to ensure we stop at the maximum recursion depth. Yields: tuple: containing: str: root str: key str: value """ if depth < 1: logger.debug('Recursion limit hit for key: {0:s}'.format(root)) return if isinstance(recur_item, (list, tuple)): for recur in recur_item: for key in self._RecurseKey(recur, root, depth): yield key return if not hasattr(recur_item, 'iteritems'): return for key, value in iter(recur_item.items()): yield root, key, value if isinstance(value, dict): value = [value] if isinstance(value, list): for item in value: if isinstance(item, dict): for keyval in self._RecurseKey( item, root=root + '/' + key, depth=depth - 1): yield keyval
def _ParseMRUListEntryValue(self, parser_mediator, registry_key, entry_index, entry_letter, **kwargs): """Parses the MRUList entry value. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. registry_key (dfwinreg.WinRegistryKey): Windows Registry key that contains the MRUList value. entry_index (int): MRUList entry index. entry_letter (str): character value representing the entry. Returns: str: MRUList entry value. """ value_string = '' value = registry_key.GetValueByName('{0:s}'.format(entry_letter)) if value is None: parser_mediator.ProduceExtractionWarning( 'missing MRUList value: {0:s} in key: {1:s}.'.format( entry_letter, registry_key.path)) elif value.DataIsString(): value_string = value.GetDataAsObject() elif value.DataIsBinaryData(): logger.debug(( '[{0:s}] Non-string MRUList entry value: {1:s} parsed as string ' 'in key: {2:s}.').format(self.NAME, entry_letter, registry_key.path)) utf16le_string_map = self._GetDataTypeMap('utf16le_string') try: value_string = self._ReadStructureFromByteStream( value.data, 0, utf16le_string_map) except (ValueError, errors.ParseError) as exception: parser_mediator.ProduceExtractionWarning( ('unable to parse MRUList entry value: {0:s} with error: ' '{1!s}').format(entry_letter, exception)) value_string = value_string.rstrip('\x00') return value_string
def _ParseMRUListEntryValue(self, parser_mediator, registry_key, entry_index, entry_letter, codepage='cp1252', **kwargs): """Parses the MRUList entry value. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. registry_key (dfwinreg.WinRegistryKey): Windows Registry key that contains the MRUList value. entry_index (int): MRUList entry index. entry_letter (str): character value representing the entry. codepage (Optional[str]): extended ASCII string codepage. Returns: str: MRUList entry value. """ value_string = '' value = registry_key.GetValueByName('{0:s}'.format(entry_letter)) if value is None: logger.debug( '[{0:s}] Missing MRUList entry value: {1:s} in key: {2:s}.'. format(self.NAME, entry_letter, registry_key.path)) elif not value.DataIsBinaryData(): logger.debug( ('[{0:s}] Non-binary MRUList entry value: {1:s} in key: ' '{2:s}.').format(self.NAME, entry_letter, registry_key.path)) elif value.data: shell_items_parser = shell_items.ShellItemsParser( registry_key.path) shell_items_parser.ParseByteStream(parser_mediator, value.data, codepage=codepage) value_string = 'Shell item path: {0:s}'.format( shell_items_parser.CopyToPath()) return value_string
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a Popularity Contest log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line was successfully parsed. """ try: self.HEADER.parseString(line) except pyparsing.ParseException: logger.debug('Not a Popularity Contest log file, invalid header') return False return True
def _ParseMRUListEntryValue( self, parser_mediator, registry_key, entry_index, entry_letter, **kwargs): """Parses the MRUList entry value. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. registry_key (dfwinreg.WinRegistryKey): Windows Registry key that contains the MRUList value. entry_index (int): MRUList entry index. entry_letter (str): character value representing the entry. Returns: str: MRUList entry value. """ value_string = '' value = registry_key.GetValueByName('{0:s}'.format(entry_letter)) if value is None: parser_mediator.ProduceExtractionError( 'missing MRUList value: {0:s} in key: {1:s}.'.format( entry_letter, registry_key.path)) elif value.DataIsString(): value_string = value.GetDataAsObject() elif value.DataIsBinaryData(): logger.debug(( '[{0:s}] Non-string MRUList entry value: {1:s} parsed as string ' 'in key: {2:s}.').format(self.NAME, entry_letter, registry_key.path)) utf16le_string_map = self._GetDataTypeMap('utf16le_string') try: value_string = self._ReadStructureFromByteStream( value.data, 0, utf16le_string_map) except (ValueError, errors.ParseError) as exception: parser_mediator.ProduceExtractionError(( 'unable to parse MRUList entry value: {0:s} with error: ' '{1!s}').format(entry_letter, exception)) value_string = value_string.rstrip('\x00') return value_string
def VerifyStructure(self, parser_mediator, line): """Verifies if a line from a text file is in the expected format. Args: parser_mediator (ParserMediator): parser mediator. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._DPKG_LOG_LINE.parseString(line) except pyparsing.ParseException as exception: logger.debug( 'Unable to parse Debian dpkg.log file with error: {0!s}'.format( exception)) return False return 'date_time' in structure and 'body' in structure
def Process(self, parser_mediator, plist_name, top_level, **kwargs): """Overwrite the default Process function so it always triggers. Process() checks if the current plist being processed is a match for a plugin by comparing the PATH and KEY requirements defined by a plugin. If both match processing continues; else raise WrongPlistPlugin. The purpose of the default plugin is to always trigger on any given plist file, thus it needs to overwrite the default behavior of comparing PATH and KEY. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. plist_name (str): name of the plist. top_level (dict[str, object]): plist top-level key. """ logger.debug('Plist {0:s} plugin used for: {1:s}'.format( self.NAME, plist_name)) self.GetEntries(parser_mediator, top_level=top_level, **kwargs)
def VerifyStructure(self, parser_mediator, line): """Verifies if a line from a text file is in the expected format. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._SELINUX_LOG_LINE.parseString(line) except pyparsing.ParseException as exception: logger.debug( 'Unable to parse SELinux audit.log file with error: {0!s}'.format( exception)) return False return 'type' in structure and 'msg' in structure
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a Mac AppFirewall log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ self._last_month = 0 self._year_use = parser_mediator.GetEstimatedYear() try: structure = self.FIREWALL_LINE.parseString(line) except pyparsing.ParseException as exception: logger.debug(( 'Unable to parse file as a Mac AppFirewall log file with error: ' '{0!s}').format(exception)) return False action = self._GetValueFromStructure(structure, 'action') if action != 'creating /var/log/appfirewall.log': logger.debug( 'Not a Mac AppFirewall log file, invalid action: {0!s}'.format( action)) return False status = self._GetValueFromStructure(structure, 'status') if status != 'Error': logger.debug( 'Not a Mac AppFirewall log file, invalid status: {0!s}'.format( status)) return False time_elements_tuple = self._GetTimeElementsTuple(structure) try: dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug(( 'Not a Mac AppFirewall log file, invalid date and time: ' '{0!s}').format(time_elements_tuple)) return False self._last_month = time_elements_tuple[1] return True
def VerifyStructure(self, parser_mediator, line): """Verify that this file is a Sophos Anti-Virus log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._LOG_LINE.parseString(line) except pyparsing.ParseException: logger.debug('Not a Sophos Anti-Virus log file') return False # Expect spaces at position 9 and 16. if ' ' not in (line[8], line[15]): logger.debug('Not a Sophos Anti-Virus log file') return False time_elements_tuple = self._GetValueFromStructure(structure, 'date_time') try: dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug(( 'Not a Sophos Anti-Virus log file, invalid date and time: ' '{0!s}').format(time_elements_tuple)) return False return True
def _ParseMRUListExEntryValue( self, parser_mediator, registry_key, entry_index, entry_number, codepage='cp1252', **kwargs): """Parses the MRUListEx entry value. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. registry_key (dfwinreg.WinRegistryKey): Windows Registry key that contains the MRUListEx value. entry_index (int): MRUListEx entry index. entry_number (int): entry number. codepage (Optional[str]): extended ASCII string codepage. Returns: str: MRUList entry value. """ value_string = '' value = registry_key.GetValueByName('{0:d}'.format(entry_number)) if value is None: parser_mediator.ProduceExtractionWarning( 'missing MRUListEx value: {0:d} in key: {1:s}.'.format( entry_number, registry_key.path)) elif not value.DataIsBinaryData(): logger.debug(( '[{0:s}] Non-binary MRUListEx entry value: {1:d} in key: ' '{2:s}.').format(self.NAME, entry_number, registry_key.path)) elif value.data: shell_items_parser = shell_items.ShellItemsParser(registry_key.path) shell_items_parser.ParseByteStream( parser_mediator, value.data, codepage=codepage) value_string = 'Shell item path: {0:s}'.format( shell_items_parser.CopyToPath()) return value_string
def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. """ if key != 'logline': logger.warning( 'Unable to parse record, unknown structure: {0:s}'.format(key)) return timestamp = self._GetValueFromStructure(structure, 'timestamp') try: timestamp = int(timestamp, 10) except (TypeError, ValueError): logger.debug('Invalid timestamp {0!s}, skipping record'.format(timestamp)) return try: text = self._GetValueFromStructure(structure, 'text', default_value='') nickname, text = self._StripThenGetNicknameAndText(text) except pyparsing.ParseException: logger.debug('Error parsing entry at offset {0:d}'.format(self._offset)) return event_data = XChatScrollbackEventData() event_data.nickname = nickname event_data.offset = self._offset event_data.text = text date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data)
def _ParseNoHeaderSingleLine(self, parser_mediator, structure): """Parse an isolated header line and store appropriate attributes. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ if not self._last_event_data: logger.debug('SkyDrive, found isolated line with no previous events') return event_data = SkyDriveOldLogEventData() event_data.offset = self._last_event_data.offset event_data.text = self._GetValueFromStructure(structure, 'text') event = time_events.DateTimeValuesEvent( self._last_date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) # TODO think to a possible refactoring for the non-header lines. self._last_date_time = None self._last_event_data = None
def ParseFileObject(self, parser_mediator, file_object): """Parses a bencoded file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_object.seek(0, os.SEEK_SET) header = file_object.read(2) if not self.BENCODE_RE.match(header): raise errors.UnableToParseFile('Not a valid Bencoded file.') file_object.seek(0, os.SEEK_SET) try: data_object = bencode.bdecode(file_object.read()) except (IOError, bencode.BTFailure) as exception: raise errors.UnableToParseFile( '[{0:s}] unable to parse file: {1:s} with error: {2!s}'.format( self.NAME, parser_mediator.GetDisplayName(), exception)) if not data_object: raise errors.UnableToParseFile( '[{0:s}] missing decoded data for file: {1:s}'.format( self.NAME, parser_mediator.GetDisplayName())) for plugin in self._plugins: try: plugin.UpdateChainAndProcess(parser_mediator, data=data_object) except errors.WrongBencodePlugin as exception: logger.debug('[{0:s}] wrong plugin: {1!s}'.format( self.NAME, exception))