def BuildFindSpecs(self, artifact_filter_names, environment_variables=None): """Builds find specifications from artifact definitions. Args: artifact_filter_names (list[str]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. environment_variables (Optional[list[EnvironmentVariableArtifact]]): environment variables. """ find_specs = [] for name in artifact_filter_names: definition = self._artifacts_registry.GetDefinitionByName(name) if not definition: logger.debug('undefined artifact definition: {0:s}'.format(name)) continue logger.debug('building find spec from artifact definition: {0:s}'.format( name)) artifact_find_specs = self._BuildFindSpecsFromArtifact( definition, environment_variables) find_specs.extend(artifact_find_specs) for find_spec in find_specs: if isinstance(find_spec, file_system_searcher.FindSpec): self.included_file_system_find_specs.append(find_spec) elif isinstance(find_spec, registry_searcher.FindSpec): self.registry_find_specs.append(find_spec) else: logger.warning('Unsupported find specification type: {0!s}'.format( type(find_spec)))
def _BuildFindSpecsFromArtifact(self, definition, environment_variables): """Builds find specifications from an artifact definition. Args: definition (artifacts.ArtifactDefinition): artifact definition. environment_variables (list[EnvironmentVariableArtifact]): environment variables. Returns: list[dfvfs.FindSpec|dfwinreg.FindSpec]: dfVFS or dfWinReg find specifications. """ find_specs = [] for source in definition.sources: if source.type_indicator == artifact_types.TYPE_INDICATOR_FILE: for path_entry in set(source.paths): specifications = self._BuildFindSpecsFromFileSourcePath( path_entry, source.separator, environment_variables, self._knowledge_base.user_accounts) find_specs.extend(specifications) elif (source.type_indicator == artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY): for key_path in set(source.keys): if self.CheckKeyCompatibility(key_path): specifications = self._BuildFindSpecsFromRegistrySourceKey(key_path) find_specs.extend(specifications) elif (source.type_indicator == artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_VALUE): # TODO: Handle Registry Values Once Supported in dfwinreg. # https://github.com/log2timeline/dfwinreg/issues/98 # Use set-comprehension to create a set of the source key paths. key_paths = { key_value['key'] for key_value in source.key_value_pairs} key_paths_string = ', '.join(key_paths) logger.warning(( 'Windows Registry values are not supported, extracting keys: ' '"{0!s}"').format(key_paths_string)) for key_path in key_paths: if self.CheckKeyCompatibility(key_path): specifications = self._BuildFindSpecsFromRegistrySourceKey(key_path) find_specs.extend(specifications) elif (source.type_indicator == artifact_types.TYPE_INDICATOR_ARTIFACT_GROUP): for name in source.names: specifications = self._BuildFindSpecsFromGroupName( name, environment_variables) find_specs.extend(specifications) else: logger.warning( 'Unsupported artifact definition source type: "{0:s}"'.format( source.type_indicator)) return find_specs
def BuildFindSpecs(self, artifact_filter_names, environment_variables=None): """Builds find specifications from artifact definitions. Args: artifact_filter_names (list[str]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. environment_variables (Optional[list[EnvironmentVariableArtifact]]): environment variables. """ find_specs = [] for name in artifact_filter_names: definition = self._artifacts_registry.GetDefinitionByName(name) if not definition: logger.debug('undefined artifact definition: {0:s}'.format(name)) continue logger.debug('building find spec from artifact definition: {0:s}'.format( name)) artifact_find_specs = self._BuildFindSpecsFromArtifact( definition, environment_variables) find_specs.extend(artifact_find_specs) for find_spec in find_specs: if isinstance(find_spec, file_system_searcher.FindSpec): self.included_file_system_find_specs.append(find_spec) elif isinstance(find_spec, registry_searcher.FindSpec): self.registry_find_specs.append(find_spec) else: logger.warning('Unsupported find specification type: {0:s}'.format( type(find_spec)))
def ProcessPathSpec(self, mediator, path_spec): """Processes a path specification. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. path_spec (dfvfs.PathSpec): path specification. """ self.last_activity_timestamp = time.time() self.processing_status = definitions.PROCESSING_STATUS_RUNNING file_entry = path_spec_resolver.Resolver.OpenFileEntry( path_spec, resolver_context=mediator.resolver_context) if file_entry is None: display_name = mediator.GetDisplayNameForPathSpec(path_spec) logger.warning( 'Unable to open file entry with path spec: {0:s}'.format( display_name)) self.processing_status = definitions.PROCESSING_STATUS_IDLE return mediator.SetFileEntry(file_entry) try: if file_entry.IsDirectory(): self._ProcessDirectory(mediator, file_entry) self._ProcessFileEntry(mediator, file_entry) finally: mediator.ResetFileEntry() self.last_activity_timestamp = time.time() self.processing_status = definitions.PROCESSING_STATUS_IDLE
def _ProcessPathSpec(self, extraction_worker, parser_mediator, path_spec): """Processes a path specification. Args: extraction_worker (worker.ExtractionWorker): extraction worker. parser_mediator (ParserMediator): parser mediator. path_spec (dfvfs.PathSpec): path specification. """ self._current_display_name = parser_mediator.GetDisplayNameForPathSpec( path_spec) self._CacheFileSystem(path_spec) excluded_find_specs = None if self.collection_filters_helper: excluded_find_specs = ( self.collection_filters_helper.excluded_file_system_find_specs) try: extraction_worker.ProcessPathSpec( parser_mediator, path_spec, excluded_find_specs=excluded_find_specs) except KeyboardInterrupt: self._abort = True self._processing_status.aborted = True if self._status_update_callback: self._status_update_callback(self._processing_status) # We cannot recover from a CacheFullError and abort processing when # it is raised. except dfvfs_errors.CacheFullError: # TODO: signal engine of failure. self._abort = True logger.error( ('ABORT: detected cache full error while processing ' 'path spec: {0:s}').format(self._current_display_name)) # All exceptions need to be caught here to prevent the worker # from being killed by an uncaught exception. except Exception as exception: # pylint: disable=broad-except parser_mediator.ProduceExtractionWarning( ('unable to process path specification with error: ' '{0!s}').format(exception), path_spec=path_spec) if getattr(self._processing_configuration, 'debug_output', False): self._StopStatusUpdateThread() logger.warning( 'Unhandled exception while processing path spec: {0:s}.'. format(self._current_display_name)) logger.exception(exception) pdb.post_mortem() self._StartStatusUpdateThread()
def _ParseFileEntryWithParser(self, parser_mediator, parser, file_entry, file_object=None): """Parses a file entry with a specific parser. Args: parser_mediator (ParserMediator): parser mediator. parser (BaseParser): parser. file_entry (dfvfs.FileEntry): file entry. file_object (Optional[file]): file-like object to parse. If not set the parser will use the parser mediator to open the file entry's default data stream as a file-like object. Returns: int: parse result which is _PARSE_RESULT_FAILURE if the file entry could not be parsed, _PARSE_RESULT_SUCCESS if the file entry successfully was parsed or _PARSE_RESULT_UNSUPPORTED when UnableToParseFile was raised. Raises: TypeError: if parser object is not a supported parser type. """ if not isinstance(parser, (parsers_interface.FileEntryParser, parsers_interface.FileObjectParser)): raise TypeError('Unsupported parser object type.') parser_mediator.ClearParserChain() parser_mediator.SampleStartTiming(parser.NAME) try: if isinstance(parser, parsers_interface.FileEntryParser): parser.Parse(parser_mediator) elif isinstance(parser, parsers_interface.FileObjectParser): parser.Parse(parser_mediator, file_object) result = self._PARSE_RESULT_SUCCESS # We catch IOError so we can determine the parser that generated the error. except (IOError, dfvfs_errors.BackEndError) as exception: display_name = parser_mediator.GetDisplayName(file_entry) logger.warning( '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( parser.NAME, display_name, exception)) result = self._PARSE_RESULT_FAILURE except errors.UnableToParseFile as exception: display_name = parser_mediator.GetDisplayName(file_entry) logger.debug( '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( parser.NAME, display_name, exception)) result = self._PARSE_RESULT_UNSUPPORTED finally: parser_mediator.SampleStopTiming(parser.NAME) parser_mediator.SampleMemoryUsage(parser.NAME) return result
def _ProcessFileEntry(self, mediator, file_entry): """Processes a file entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_entry (dfvfs.FileEntry): file entry. """ display_name = mediator.GetDisplayName() logger.debug('[ProcessFileEntry] processing file entry: {0:s}'.format( display_name)) reference_count = mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec) try: if self._IsMetadataFile(file_entry): self._ProcessMetadataFile(mediator, file_entry) else: file_entry_processed = False for data_stream in file_entry.data_streams: if self._abort: break if self._CanSkipDataStream(file_entry, data_stream): logger.debug( ('[ProcessFileEntry] Skipping datastream {0:s} ' 'for {1:s}: {2:s}').format( data_stream.name, file_entry.type, display_name)) continue self._ProcessFileEntryDataStream(mediator, file_entry, data_stream) file_entry_processed = True if not file_entry_processed: # For when the file entry does not contain a data stream. self._ProcessFileEntryDataStream(mediator, file_entry, None) finally: new_reference_count = ( mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec)) if reference_count != new_reference_count: # Clean up after parsers that do not call close explicitly. if mediator.resolver_context.ForceRemoveFileObject( file_entry.path_spec): logger.warning( 'File-object not explicitly closed for file: {0:s}'. format(display_name)) logger.debug( '[ProcessFileEntry] done processing file entry: {0:s}'.format( display_name))
def ReadSystemConfigurationArtifact(self, system_configuration, session_identifier=None): """Reads the knowledge base values from a system configuration artifact. Note that this overwrites existing values in the knowledge base. Args: system_configuration (SystemConfigurationArtifact): system configuration artifact. session_identifier (Optional[str])): session identifier, where None represents the active session. """ session_identifier = session_identifier or self._active_session if system_configuration.code_page: try: self.SetCodepage(system_configuration.code_page) except ValueError: logger.warning( 'Unsupported codepage: {0:s}, defaulting to {1:s}'.format( system_configuration.code_page, self._codepage)) self._hostnames[session_identifier] = system_configuration.hostname self.SetValue('keyboard_layout', system_configuration.keyboard_layout) self.SetValue('operating_system', system_configuration.operating_system) self.SetValue('operating_system_product', system_configuration.operating_system_product) self.SetValue('operating_system_version', system_configuration.operating_system_version) # Set the available time zones before the system time zone so that localized # time zone names can be mapped to their corresponding Python time zone. self._available_time_zones[session_identifier] = { time_zone.name: time_zone for time_zone in system_configuration.available_time_zones } if system_configuration.time_zone: try: self.SetTimeZone(system_configuration.time_zone) except ValueError: logger.warning( 'Unsupported time zone: {0:s}, defaulting to {1:s}'.format( system_configuration.time_zone, self.timezone.zone)) self._user_accounts[session_identifier] = { user_account.identifier: user_account for user_account in system_configuration.user_accounts } self._windows_eventlog_providers[session_identifier] = { provider.log_source: provider for provider in system_configuration.windows_eventlog_providers }
def _ExtractPathSpecsFromDirectory(self, file_entry, depth=0): """Extracts path specification from a directory. Args: file_entry (dfvfs.FileEntry): file entry that refers to the directory. depth (Optional[int]): current depth where 0 represents the file system root. Yields: dfvfs.PathSpec: path specification of a file entry found in the directory. Raises: MaximumRecursionDepth: when the maximum recursion depth is reached. """ if depth >= self._MAXIMUM_DEPTH: raise errors.MaximumRecursionDepth( 'Maximum recursion depth reached.') # Need to do a breadth-first search otherwise we'll hit the Python # maximum recursion depth. sub_directories = [] for sub_file_entry in file_entry.sub_file_entries: try: if not sub_file_entry.IsAllocated() or sub_file_entry.IsLink(): continue except dfvfs_errors.BackEndError as exception: path_spec_string = self._GetPathSpecificationString( sub_file_entry.path_spec) logger.warning( 'Unable to process file: {0:s} with error: {1!s}'.format( path_spec_string.replace('\n', ';'), exception)) continue # For TSK-based file entries only, ignore the virtual /$OrphanFiles # directory. if sub_file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK: if file_entry.IsRoot( ) and sub_file_entry.name == '$OrphanFiles': continue if sub_file_entry.IsDirectory(): sub_directories.append(sub_file_entry) for path_spec in self._ExtractPathSpecsFromFile(sub_file_entry): yield path_spec for sub_file_entry in sub_directories: try: for path_spec in self._ExtractPathSpecsFromDirectory( sub_file_entry, depth=(depth + 1)): yield path_spec except (IOError, dfvfs_errors.AccessError, dfvfs_errors.BackEndError, dfvfs_errors.PathSpecError) as exception: logger.warning('{0!s}'.format(exception))
def BuildFindSpecs(self, path_filters, environment_variables=None): """Builds find specifications from path filters. Args: path_filters (list[PathFilter]): path filters. environment_variables (Optional[list[EnvironmentVariableArtifact]]): environment variables. """ for path_filter in path_filters: for path in path_filter.paths: # Since paths are regular expression the path separator is escaped. if path_filter.path_separator == '\\': path_separator = '\\\\' else: path_separator = path_filter.path_separator expand_path = False path_segments = path.split(path_separator) for index, path_segment in enumerate(path_segments): if len(path_segment) <= 2: continue if path_segment[0] == '{' and path_segment[-1] == '}': # Rewrite legacy path expansion attributes, such as {systemroot} # into %SystemRoot%. path_segment = '%{0:s}%'.format(path_segment[1:-1]) path_segments[index] = path_segment if path_segment[0] == '%' and path_segment[-1] == '%': expand_path = True if expand_path: path_segments = path_helper.PathHelper.ExpandWindowsPathSegments( path_segments, environment_variables) if path_segments[0] != '': logger.warning(( 'The path filter must be defined as an absolute path: ' '{0:s}').format(path)) continue # Strip the root path segment. path_segments.pop(0) if not path_segments[-1]: logger.warning( 'Empty last path segment in path: {0:s}'.format(path)) continue find_spec = file_system_searcher.FindSpec( case_sensitive=False, location_regex=path_segments) if path_filter.filter_type == PathFilter.FILTER_TYPE_EXCLUDE: self.excluded_file_system_find_specs.append(find_spec) elif path_filter.filter_type == PathFilter.FILTER_TYPE_INCLUDE: self.included_file_system_find_specs.append(find_spec)
def BuildFindSpecs(self, path_filters, environment_variables=None): """Builds find specifications from path filters. Args: path_filters (list[PathFilter]): path filters. environment_variables (Optional[list[EnvironmentVariableArtifact]]): environment variables. """ for path_filter in path_filters: for path in path_filter.paths: # Since paths are regular expression the path separator is escaped. if path_filter.path_separator == '\\': path_separator = '\\\\' else: path_separator = path_filter.path_separator expand_path = False path_segments = path.split(path_separator) for index, path_segment in enumerate(path_segments): if len(path_segment) <= 2: continue if path_segment[0] == '{' and path_segment[-1] == '}': # Rewrite legacy path expansion attributes. e.g. {systemroot}, # into %SystemRoot%. path_segment = '%{0:s}%'.format(path_segment[1:-1]) path_segments[index] = path_segment if path_segment[0] == '%' and path_segment[-1] == '%': expand_path = True if expand_path: path_segments = path_helper.PathHelper.ExpandWindowsPathSegments( path_segments, environment_variables) if path_segments[0] != '': logger.warning(( 'The path filter must be defined as an absolute path: ' '{0:s}').format(path)) continue # Strip the root path segment. path_segments.pop(0) if not path_segments[-1]: logger.warning( 'Empty last path segment in path: {0:s}'.format(path)) continue find_spec = file_system_searcher.FindSpec( location_regex=path_segments, case_sensitive=False) if path_filter.filter_type == PathFilter.FILTER_TYPE_EXCLUDE: self.excluded_file_system_find_specs.append(find_spec) elif path_filter.filter_type == PathFilter.FILTER_TYPE_INCLUDE: self.included_file_system_find_specs.append(find_spec)
def BuildFindSpecsFromFileArtifact(self, source_path, path_separator, environment_variables, user_accounts): """Builds find specifications from a file source type. Args: source_path (str): file system path defined by the source. path_separator (str): file system path segment separator. environment_variables (list[str]): environment variable attributes used to dynamically populate environment variables in key. user_accounts (list[str]): identified user accounts stored in the knowledge base. Returns: list[dfvfs.FindSpec]: find specifications for the file source type. """ find_specs = [] for glob_path in path_helper.PathHelper.ExpandRecursiveGlobs( source_path, path_separator): for path in path_helper.PathHelper.ExpandUsersHomeDirectoryPath( glob_path, user_accounts): if '%' in path: path = path_helper.PathHelper.ExpandWindowsPath( path, environment_variables) if not path.startswith(path_separator): logger.warning(( 'The path filter must be defined as an absolute path: ' '"{0:s}"').format(path)) continue # Convert the path filters into a list of path segments and # strip the root path segment. path_segments = path.split(path_separator) # Remove initial root entry path_segments.pop(0) if not path_segments[-1]: logger.warning( 'Empty last path segment in path filter: "{0:s}"'. format(path)) path_segments.pop(-1) try: find_spec = file_system_searcher.FindSpec( location_glob=path_segments, case_sensitive=False) except ValueError as exception: logger.error(( 'Unable to build find specification for path: "{0:s}" with ' 'error: {1!s}').format(path, exception)) continue find_specs.append(find_spec) return find_specs
def _ProcessFileEntry(self, mediator, file_entry): """Processes a file entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_entry (dfvfs.FileEntry): file entry. """ display_name = mediator.GetDisplayName() logger.debug( '[ProcessFileEntry] processing file entry: {0:s}'.format(display_name)) reference_count = mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec) try: if self._IsMetadataFile(file_entry): self._ProcessMetadataFile(mediator, file_entry) else: file_entry_processed = False for data_stream in file_entry.data_streams: if self._abort: break if self._CanSkipDataStream(file_entry, data_stream): logger.debug(( '[ProcessFileEntry] Skipping datastream {0:s} for {1:s}: ' '{2:s}').format( data_stream.name, file_entry.type_indicator, display_name)) continue self._ProcessFileEntryDataStream(mediator, file_entry, data_stream) file_entry_processed = True if not file_entry_processed: # For when the file entry does not contain a data stream. self._ProcessFileEntryDataStream(mediator, file_entry, None) finally: new_reference_count = ( mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec)) if reference_count != new_reference_count: # Clean up after parsers that do not call close explicitly. if mediator.resolver_context.ForceRemoveFileObject( file_entry.path_spec): logger.warning( 'File-object not explicitly closed for file: {0:s}'.format( display_name)) logger.debug( '[ProcessFileEntry] done processing file entry: {0:s}'.format( display_name))
def _ExtractPathSpecsFromFileSystem(self, path_spec, find_specs=None, recurse_file_system=True, resolver_context=None): """Extracts path specification from a file system within a specific source. Args: path_spec (dfvfs.PathSpec): path specification of the root of the file system. find_specs (Optional[list[dfvfs.FindSpec]]): find specifications used in path specification extraction. recurse_file_system (Optional[bool]): True if extraction should recurse into a file system. resolver_context (Optional[dfvfs.Context]): resolver context. Yields: dfvfs.PathSpec: path specification of a file entry found in the file system. """ file_system = None try: file_system = path_spec_resolver.Resolver.OpenFileSystem( path_spec, resolver_context=resolver_context) except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError, dfvfs_errors.PathSpecError) as exception: logger.error('Unable to open file system with error: {0!s}'.format( exception)) if file_system: try: if find_specs: searcher = file_system_searcher.FileSystemSearcher( file_system, path_spec) for extracted_path_spec in searcher.Find( find_specs=find_specs): yield extracted_path_spec elif recurse_file_system: file_entry = file_system.GetFileEntryByPathSpec(path_spec) if file_entry: for extracted_path_spec in self._ExtractPathSpecsFromDirectory( file_entry): yield extracted_path_spec else: yield path_spec except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError, dfvfs_errors.PathSpecError) as exception: logger.warning('{0!s}'.format(exception)) finally: file_system.Close()
def _BuildFindSpecsFromFileSourcePath(self, source_path, path_separator, environment_variables, user_accounts): """Builds find specifications from a file source type. Args: source_path (str): file system path defined by the source. path_separator (str): file system path segment separator. environment_variables (list[str]): environment variable attributes used to dynamically populate environment variables in key. user_accounts (list[str]): identified user accounts stored in the knowledge base. Returns: list[dfvfs.FindSpec]: find specifications for the file source type. """ find_specs = [] for path_glob in path_helper.PathHelper.ExpandGlobStars( source_path, path_separator): logger.debug( 'building find spec from path glob: {0:s}'.format(path_glob)) for path in path_helper.PathHelper.ExpandUsersVariablePath( path_glob, path_separator, user_accounts): logger.debug( 'building find spec from path: {0:s}'.format(path)) if '%' in path: path = path_helper.PathHelper.ExpandWindowsPath( path, environment_variables) logger.debug( 'building find spec from expanded path: {0:s}'.format( path)) if not path.startswith(path_separator): logger.warning(( 'The path filter must be defined as an absolute path: ' '"{0:s}"').format(path)) continue try: find_spec = file_system_searcher.FindSpec( case_sensitive=False, location_glob=path, location_separator=path_separator) except ValueError as exception: logger.error(( 'Unable to build find specification for path: "{0:s}" with ' 'error: {1!s}').format(path, exception)) continue find_specs.append(find_spec) return find_specs
def BuildFindSpecsFromFileArtifact( self, source_path, path_separator, environment_variables, user_accounts): """Builds find specifications from a file source type. Args: source_path (str): file system path defined by the source. path_separator (str): file system path segment separator. environment_variables (list[str]): environment variable attributes used to dynamically populate environment variables in key. user_accounts (list[str]): identified user accounts stored in the knowledge base. Returns: list[dfvfs.FindSpec]: find specifications for the file source type. """ find_specs = [] for glob_path in path_helper.PathHelper.ExpandRecursiveGlobs( source_path, path_separator): for path in path_helper.PathHelper.ExpandUsersHomeDirectoryPath( glob_path, path_separator, user_accounts): if '%' in path: path = path_helper.PathHelper.ExpandWindowsPath( path, environment_variables) if not path.startswith(path_separator): logger.warning(( 'The path filter must be defined as an absolute path: ' '"{0:s}"').format(path)) continue # Convert the path filters into a list of path segments and # strip the root path segment. path_segments = path.split(path_separator) # Remove initial root entry path_segments.pop(0) if not path_segments[-1]: logger.warning( 'Empty last path segment in path filter: "{0:s}"'.format(path)) path_segments.pop(-1) try: find_spec = file_system_searcher.FindSpec( location_glob=path_segments, case_sensitive=False) except ValueError as exception: logger.error(( 'Unable to build find specification for path: "{0:s}" with ' 'error: {1!s}').format(path, exception)) continue find_specs.append(find_spec) return find_specs
def ReadSystemConfigurationArtifact(self, system_configuration): """Reads the knowledge base values from a system configuration artifact. Note that this overwrites existing values in the knowledge base. Args: system_configuration (SystemConfigurationArtifact): system configuration artifact. """ if not system_configuration: return if system_configuration.code_page: try: self.SetCodepage(system_configuration.code_page) except ValueError: logger.warning( 'Unsupported codepage: {0:s}, defaulting to {1:s}'.format( system_configuration.code_page, self._codepage)) self._hostnames[self._active_session] = system_configuration.hostname self.SetValue('keyboard_layout', system_configuration.keyboard_layout) if system_configuration.language: self.SetLanguage(system_configuration.language) self.SetValue('operating_system', system_configuration.operating_system) self.SetValue('operating_system_product', system_configuration.operating_system_product) self.SetValue('operating_system_version', system_configuration.operating_system_version) # Set the available time zones before the system time zone so that localized # time zone names can be mapped to their corresponding Python time zone. self._available_time_zones[self._active_session] = { time_zone.name: time_zone for time_zone in system_configuration.available_time_zones } if system_configuration.time_zone: try: self.SetTimeZone(system_configuration.time_zone) except ValueError: logger.warning( 'Unsupported time zone: {0:s}, defaulting to {1:s}'.format( system_configuration.time_zone, self._time_zone.zone)) self._user_accounts[self._active_session] = { user_account.identifier: user_account for user_account in system_configuration.user_accounts }
def PopItem(self): """Pops an item off the queue. If no ZeroMQ socket has been created, one will be created the first time this method is called. Returns: object: item from the queue. Raises: KeyboardInterrupt: if the process is sent a KeyboardInterrupt while popping an item. QueueEmpty: if the queue is empty, and no item could be popped within the queue timeout. RuntimeError: if terminate event is missing. zmq.error.ZMQError: if an error occurs in ZeroMQ. """ if not self._zmq_socket: self._CreateZMQSocket() if not self._terminate_event: raise RuntimeError('Missing terminate event.') logger.debug('Pop on {0:s} queue, port {1:d}'.format( self.name, self.port)) last_retry_time = time.time() + self.timeout_seconds while not self._terminate_event.is_set(): try: self._zmq_socket.send_pyobj(None) break except zmq.error.Again: # The existing socket is now out of sync, so we need to open a new one. self._CreateZMQSocket() if time.time() > last_retry_time: logger.warning('{0:s} timeout requesting item'.format( self.name)) raise errors.QueueEmpty continue while not self._terminate_event.is_set(): try: return self._ReceiveItemOnActivity(self._zmq_socket) except errors.QueueEmpty: continue except KeyboardInterrupt: self.Close(abort=True) raise
def _ExtractPathSpecsFromFileSystem( self, path_spec, find_specs=None, recurse_file_system=True, resolver_context=None): """Extracts path specification from a file system within a specific source. Args: path_spec (dfvfs.PathSpec): path specification of the root of the file system. find_specs (Optional[list[dfvfs.FindSpec]]): find specifications. recurse_file_system (Optional[bool]): True if extraction should recurse into a file system. resolver_context (Optional[dfvfs.Context]): resolver context. Yields: dfvfs.PathSpec: path specification of a file entry found in the file system. """ try: file_system = path_spec_resolver.Resolver.OpenFileSystem( path_spec, resolver_context=resolver_context) except ( dfvfs_errors.AccessError, dfvfs_errors.BackEndError, dfvfs_errors.PathSpecError) as exception: logger.error( 'Unable to open file system with error: {0!s}'.format(exception)) return try: if find_specs: searcher = file_system_searcher.FileSystemSearcher( file_system, path_spec) for extracted_path_spec in searcher.Find(find_specs=find_specs): yield extracted_path_spec elif recurse_file_system: file_entry = file_system.GetFileEntryByPathSpec(path_spec) if file_entry: for extracted_path_spec in self._ExtractPathSpecsFromDirectory( file_entry): yield extracted_path_spec else: yield path_spec except ( dfvfs_errors.AccessError, dfvfs_errors.BackEndError, dfvfs_errors.PathSpecError) as exception: logger.warning('{0!s}'.format(exception)) finally: file_system.Close()
def _ProcessPathSpec(self, extraction_worker, parser_mediator, path_spec): """Processes a path specification. Args: extraction_worker (worker.ExtractionWorker): extraction worker. parser_mediator (ParserMediator): parser mediator. path_spec (dfvfs.PathSpec): path specification. """ self._current_display_name = parser_mediator.GetDisplayNameForPathSpec( path_spec) excluded_find_specs = None if self.collection_filters_helper: excluded_find_specs = ( self.collection_filters_helper.excluded_file_system_find_specs) try: extraction_worker.ProcessPathSpec( parser_mediator, path_spec, excluded_find_specs=excluded_find_specs) except KeyboardInterrupt: self._abort = True self._processing_status.aborted = True if self._status_update_callback: self._status_update_callback(self._processing_status) # We cannot recover from a CacheFullError and abort processing when # it is raised. except dfvfs_errors.CacheFullError: # TODO: signal engine of failure. self._abort = True logger.error(( 'ABORT: detected cache full error while processing ' 'path spec: {0:s}').format(self._current_display_name)) # All exceptions need to be caught here to prevent the worker # from being killed by an uncaught exception. except Exception as exception: # pylint: disable=broad-except parser_mediator.ProduceExtractionWarning(( 'unable to process path specification with error: ' '{0!s}').format(exception), path_spec=path_spec) if getattr(self._processing_configuration, 'debug_output', False): logger.warning( 'Unhandled exception while processing path spec: {0:s}.'.format( self._current_display_name)) logger.exception(exception) pdb.post_mortem()
def _ExtractPathSpecs(self, path_spec, find_specs=None, recurse_file_system=True, resolver_context=None): """Extracts path specification from a specific source. Args: path_spec (dfvfs.PathSpec): path specification. find_specs (Optional[list[dfvfs.FindSpec]]): find specifications used in path specification extraction. recurse_file_system (Optional[bool]): True if extraction should recurse into a file system. resolver_context (Optional[dfvfs.Context]): resolver context. Yields: dfvfs.PathSpec: path specification of a file entry found in the source. """ try: file_entry = path_spec_resolver.Resolver.OpenFileEntry( path_spec, resolver_context=resolver_context) except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError, dfvfs_errors.PathSpecError) as exception: logger.error('Unable to open file entry with error: {0!s}'.format( exception)) return if not file_entry: logger.warning('Unable to open: {0:s}'.format( path_spec.comparable)) return if (not file_entry.IsDirectory() and not file_entry.IsFile() and not file_entry.IsDevice()): logger.warning( ('Source path specification not a device, file or directory.\n' '{0:s}').format(path_spec.comparable)) return if file_entry.IsFile(): yield path_spec else: for extracted_path_spec in self._ExtractPathSpecsFromFileSystem( path_spec, find_specs=find_specs, recurse_file_system=recurse_file_system, resolver_context=resolver_context): yield extracted_path_spec
def _CheckKeyCompatibility(self, key_path): """Checks if a Windows Registry key path is supported by dfWinReg. Args: key_path (str): path of the Windows Registry key. Returns: bool: True if key is compatible or False if not. """ for key_path_prefix in self._COMPATIBLE_REGISTRY_KEY_PATH_PREFIXES: if key_path.startswith(key_path_prefix): return True logger.warning( 'Prefix of key "{0:s}" is currently not supported'.format(key_path)) return False
def CheckKeyCompatibility(cls, key_path): """Checks if a Windows Registry key path is supported by dfWinReg. Args: key_path (str): path of the Windows Registry key. Returns: bool: True if key is compatible or False if not. """ key_path_upper = key_path.upper() for key_path_prefix in cls._COMPATIBLE_REGISTRY_KEY_PATH_PREFIXES: if key_path_upper.startswith(key_path_prefix): return True logger.warning('Key path: "{0:s}" is currently not supported'.format( key_path)) return False
def _ExtractPathSpecs( self, path_spec, find_specs=None, recurse_file_system=True, resolver_context=None): """Extracts path specification from a specific source. Args: path_spec (dfvfs.PathSpec): path specification. find_specs (Optional[list[dfvfs.FindSpec]]): find specifications used in path specification extraction. recurse_file_system (Optional[bool]): True if extraction should recurse into a file system. resolver_context (Optional[dfvfs.Context]): resolver context. Yields: dfvfs.PathSpec: path specification of a file entry found in the source. """ try: file_entry = path_spec_resolver.Resolver.OpenFileEntry( path_spec, resolver_context=resolver_context) except ( dfvfs_errors.AccessError, dfvfs_errors.BackEndError, dfvfs_errors.PathSpecError) as exception: logger.error( 'Unable to open file entry with error: {0!s}'.format(exception)) return if not file_entry: logger.warning('Unable to open: {0:s}'.format(path_spec.comparable)) return if (not file_entry.IsDirectory() and not file_entry.IsFile() and not file_entry.IsDevice()): logger.warning(( 'Source path specification not a device, file or directory.\n' '{0:s}').format(path_spec.comparable)) return if file_entry.IsFile(): yield path_spec else: for extracted_path_spec in self._ExtractPathSpecsFromFileSystem( path_spec, find_specs=find_specs, recurse_file_system=recurse_file_system, resolver_context=resolver_context): yield extracted_path_spec
def Close(self, abort=False): """Closes the queue. Args: abort (Optional[bool]): whether the Close is the result of an abort condition. If True, queue contents may be lost. Raises: QueueAlreadyClosed: If the queue is not started, or has already been closed. RuntimeError: if closed or terminate event is missing. """ if not self._closed_event or not self._terminate_event: raise RuntimeError('Missing closed or terminate event.') if not abort and self._closed_event.is_set(): raise errors.QueueAlreadyClosed() self._closed_event.set() if abort: if not self._closed_event.is_set(): logger.warning( '{0:s} queue aborting. Contents may be lost.'.format( self.name)) # We can't determine whether a there might be an operation being performed # on the socket in a separate method or thread, so we'll signal that any # such operation should cease. self._terminate_event.set() self._linger_seconds = 0 if self._zmq_thread: logger.debug('[{0:s}] Waiting for thread to exit.'.format( self.name)) self._zmq_thread.join(timeout=self.timeout_seconds) if self._zmq_thread.isAlive(): logger.error(( '{0:s} ZMQ responder thread did not exit within timeout' ).format(self.name)) else: logger.debug( '{0:s} queue closing, will linger for up to {1:d} seconds'. format(self.name, self._linger_seconds))
def CheckKeyCompatibility(key_path): """Checks if a Windows Registry key path is supported by dfWinReg. Args: key_path (str): path of the Windows Registry key. Returns: bool: True if key is compatible or False if not. """ for key_path_prefix in ( ArtifactDefinitionsFilterHelper._COMPATIBLE_REGISTRY_KEY_PATH_PREFIXES): key_path = key_path.upper() if key_path.startswith(key_path_prefix): return True logger.warning( 'Prefix of key "{0:s}" is currently not supported'.format(key_path)) return False
def ReadSystemConfigurationArtifact(self, system_configuration, session_identifier=CURRENT_SESSION): """Reads the knowledge base values from a system configuration artifact. Note that this overwrites existing values in the knowledge base. Args: system_configuration (SystemConfigurationArtifact): system configuration artifact. session_identifier (Optional[str])): session identifier, where CURRENT_SESSION represents the active session. """ if system_configuration.code_page: try: self.SetCodepage(system_configuration.code_page) except ValueError: logger.warning( 'Unsupported codepage: {0:s}, defaulting to {1:s}'.format( system_configuration.code_page, self._codepage)) self._hostnames[session_identifier] = system_configuration.hostname self.SetValue('keyboard_layout', system_configuration.keyboard_layout) self.SetValue('operating_system', system_configuration.operating_system) self.SetValue('operating_system_product', system_configuration.operating_system_product) self.SetValue('operating_system_version', system_configuration.operating_system_version) if system_configuration.time_zone: try: self.SetTimeZone(system_configuration.time_zone) except ValueError: logger.warning( 'Unsupported time zone: {0:s}, defaulting to {1:s}'.format( system_configuration.time_zone, self.timezone.zone)) self._user_accounts[session_identifier] = { user_account.username: user_account for user_account in system_configuration.user_accounts }
def ProcessPathSpec(self, mediator, path_spec, excluded_find_specs=None): """Processes a path specification. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. path_spec (dfvfs.PathSpec): path specification. excluded_find_specs (Optional[list[dfvfs.FindSpec]]): find specifications that are excluded from processing. """ self.last_activity_timestamp = time.time() self.processing_status = definitions.STATUS_INDICATOR_RUNNING file_entry = path_spec_resolver.Resolver.OpenFileEntry( path_spec, resolver_context=mediator.resolver_context) if file_entry is None: display_name = mediator.GetDisplayNameForPathSpec(path_spec) logger.warning( 'Unable to open file entry with path spec: {0:s}'.format( display_name)) self.processing_status = definitions.STATUS_INDICATOR_IDLE return for find_spec in excluded_find_specs or []: if find_spec.CompareLocation(file_entry): logger.info( 'Skipped: {0:s} because of exclusion filter.'.format( file_entry.path_spec.location)) self.processing_status = definitions.STATUS_INDICATOR_IDLE return mediator.SetFileEntry(file_entry) try: if file_entry.IsDirectory(): self._ProcessDirectory(mediator, file_entry) self._ProcessFileEntry(mediator, file_entry) finally: mediator.ResetFileEntry() self.last_activity_timestamp = time.time() self.processing_status = definitions.STATUS_INDICATOR_IDLE
def BuildFindSpecs(self, environment_variables=None): """Builds find specifications from artifact definitions. The resulting find specifications are set in the knowledge base. Args: environment_variables (Optional[list[EnvironmentVariableArtifact]]): environment variables. """ find_specs = [] for name in self._artifacts: definition = self._artifacts_registry.GetDefinitionByName(name) if not definition: logger.debug( 'undefined artifact definition: {0:s}'.format(name)) continue logger.debug( 'building find spec from artifact definition: {0:s}'.format( name)) artifact_find_specs = self._BuildFindSpecsFromArtifact( definition, environment_variables) find_specs.extend(artifact_find_specs) find_specs_per_source_type = defaultdict(list) for find_spec in find_specs: if isinstance(find_spec, registry_searcher.FindSpec): artifact_list = find_specs_per_source_type[ artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY] artifact_list.append(find_spec) continue if isinstance(find_spec, file_system_searcher.FindSpec): artifact_list = find_specs_per_source_type[ artifact_types.TYPE_INDICATOR_FILE] artifact_list.append(find_spec) continue logger.warning('Unknown find specification type: {0:s}'.format( type(find_spec))) self._knowledge_base.SetValue(self.KNOWLEDGE_BASE_VALUE, find_specs_per_source_type)
def ReadSystemConfigurationArtifact( self, system_configuration, session_identifier=CURRENT_SESSION): """Reads the knowledge base values from a system configuration artifact. Note that this overwrites existing values in the knowledge base. Args: system_configuration (SystemConfigurationArtifact): system configuration artifact. session_identifier (Optional[str])): session identifier, where CURRENT_SESSION represents the active session. """ if system_configuration.code_page: try: self.SetCodepage(system_configuration.code_page) except ValueError: logger.warning( 'Unsupported codepage: {0:s}, defaulting to {1:s}'.format( system_configuration.code_page, self._codepage)) self._hostnames[session_identifier] = system_configuration.hostname self.SetValue('keyboard_layout', system_configuration.keyboard_layout) self.SetValue('operating_system', system_configuration.operating_system) self.SetValue( 'operating_system_product', system_configuration.operating_system_product) self.SetValue( 'operating_system_version', system_configuration.operating_system_version) if system_configuration.time_zone: try: self.SetTimeZone(system_configuration.time_zone) except ValueError: logger.warning( 'Unsupported time zone: {0:s}, defaulting to {1:s}'.format( system_configuration.time_zone, self.timezone.zone)) self._user_accounts[session_identifier] = { user_account.username: user_account for user_account in system_configuration.user_accounts}
def ProcessPathSpec(self, mediator, path_spec, excluded_find_specs=None): """Processes a path specification. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. path_spec (dfvfs.PathSpec): path specification. excluded_find_specs (Optional[list[dfvfs.FindSpec]]): find specifications that are excluded from processing. """ self.last_activity_timestamp = time.time() self.processing_status = definitions.STATUS_INDICATOR_RUNNING file_entry = path_spec_resolver.Resolver.OpenFileEntry( path_spec, resolver_context=mediator.resolver_context) if file_entry is None: display_name = mediator.GetDisplayNameForPathSpec(path_spec) logger.warning( 'Unable to open file entry with path spec: {0:s}'.format( display_name)) self.processing_status = definitions.STATUS_INDICATOR_IDLE return for find_spec in excluded_find_specs or []: if find_spec.Matches(file_entry) == (True, True): logger.info('Skipped: {0:s} because of exclusion filter.'.format( file_entry.path_spec.location)) return mediator.SetFileEntry(file_entry) try: if file_entry.IsDirectory(): self._ProcessDirectory(mediator, file_entry) self._ProcessFileEntry(mediator, file_entry) finally: mediator.ResetFileEntry() self.last_activity_timestamp = time.time() self.processing_status = definitions.STATUS_INDICATOR_IDLE
def ExpandGlobStars(cls, path, path_separator): """Expands globstars "**" in a path. A globstar "**" will recursively match all files and zero or more directories and subdirectories. By default the maximum recursion depth is 10 subdirectories, a numeric values after the globstar, such as "**5", can be used to define the maximum recursion depth. Args: path (str): path to be expanded. path_separator (str): path segment separator. Returns: list[str]: String path expanded for each glob. """ expanded_paths = [] path_segments = path.split(path_separator) last_segment_index = len(path_segments) - 1 for segment_index, path_segment in enumerate(path_segments): recursion_depth = None if path_segment.startswith('**'): if len(path_segment) == 2: recursion_depth = 10 else: try: recursion_depth = int(path_segment[2:], 10) except (TypeError, ValueError): logger.warning(( 'Globstar with suffix "{0:s}" in path "{1:s}" not ' 'supported.').format(path_segment, path)) elif '**' in path_segment: logger.warning(( 'Globstar with prefix "{0:s}" in path "{1:s}" not ' 'supported.').format(path_segment, path)) if recursion_depth is not None: if recursion_depth <= 1 or recursion_depth > cls._RECURSIVE_GLOB_LIMIT: logger.warning(( 'Globstar "{0:s}" in path "{1:s}" exceed recursion maximum ' 'recursion depth, limiting to: {2:d}.').format( path_segment, path, cls._RECURSIVE_GLOB_LIMIT)) recursion_depth = cls._RECURSIVE_GLOB_LIMIT next_segment_index = segment_index + 1 for expanded_path_segment in [ ['*'] * depth for depth in range(1, recursion_depth + 1)]: expanded_path_segments = list(path_segments[:segment_index]) expanded_path_segments.extend(expanded_path_segment) if next_segment_index <= last_segment_index: expanded_path_segments.extend(path_segments[next_segment_index:]) expanded_path = path_separator.join(expanded_path_segments) expanded_paths.append(expanded_path) return expanded_paths or [path]
def ExpandRecursiveGlobs(cls, path, path_separator): """Expands recursive like globs present in an artifact path. If a path ends in '**', with up to two optional digits such as '**10', the '**' will recursively match all files and zero or more directories from the specified path. The optional digits indicate the recursion depth. By default recursion depth is 10 directories. If the glob is followed by the specified path segment separator, only directories and subdirectories will be matched. Args: path (str): path to be expanded. path_separator (str): path segment separator. Returns: list[str]: String path expanded for each glob. """ glob_regex = r'(.*)?{0:s}\*\*(\d{{1,2}})?({0:s})?$'.format( re.escape(path_separator)) match = re.search(glob_regex, path) if not match: return [path] skip_first = False if match.group(3): skip_first = True if match.group(2): iterations = int(match.group(2)) else: iterations = cls._RECURSIVE_GLOB_LIMIT logger.warning( ('Path "{0:s}" contains fully recursive glob, limiting to 10 ' 'levels').format(path)) return cls.AppendPathEntries(match.group(1), path_separator, iterations, skip_first)
def ExpandRecursiveGlobs(cls, path, path_separator): """Expands recursive like globs present in an artifact path. If a path ends in '**', with up to two optional digits such as '**10', the '**' will recursively match all files and zero or more directories from the specified path. The optional digits indicate the recursion depth. By default recursion depth is 10 directories. If the glob is followed by the specified path segment separator, only directories and subdirectories will be matched. Args: path (str): path to be expanded. path_separator (str): path segment separator. Returns: list[str]: String path expanded for each glob. """ glob_regex = r'(.*)?{0:s}\*\*(\d{{1,2}})?({0:s})?$'.format( re.escape(path_separator)) match = re.search(glob_regex, path) if not match: return [path] skip_first = False if match.group(3): skip_first = True if match.group(2): iterations = int(match.group(2)) else: iterations = cls._RECURSIVE_GLOB_LIMIT logger.warning(( 'Path "{0:s}" contains fully recursive glob, limiting to 10 ' 'levels').format(path)) return cls.AppendPathEntries( match.group(1), path_separator, iterations, skip_first)
def _ExtractPathSpecsFromDirectory(self, file_entry, depth=0): """Extracts path specification from a directory. Args: file_entry (dfvfs.FileEntry): file entry that refers to the directory. depth (Optional[int]): current depth where 0 represents the file system root. Yields: dfvfs.PathSpec: path specification of a file entry found in the directory. """ if depth >= self._MAXIMUM_DEPTH: raise errors.MaximumRecursionDepth('Maximum recursion depth reached.') # Need to do a breadth-first search otherwise we'll hit the Python # maximum recursion depth. sub_directories = [] for sub_file_entry in file_entry.sub_file_entries: try: if not sub_file_entry.IsAllocated() or sub_file_entry.IsLink(): continue except dfvfs_errors.BackEndError as exception: logger.warning( 'Unable to process file: {0:s} with error: {1!s}'.format( sub_file_entry.path_spec.comparable.replace( '\n', ';'), exception)) continue # For TSK-based file entries only, ignore the virtual /$OrphanFiles # directory. if sub_file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK: if file_entry.IsRoot() and sub_file_entry.name == '$OrphanFiles': continue if sub_file_entry.IsDirectory(): sub_directories.append(sub_file_entry) elif sub_file_entry.IsFile(): # If we are dealing with a VSS we want to calculate a hash # value based on available timestamps and compare that to previously # calculated hash values, and only include the file into the queue if # the hash does not match. if self._duplicate_file_check: hash_value = self._CalculateNTFSTimeHash(sub_file_entry) inode = getattr(sub_file_entry.path_spec, 'inode', 0) if inode in self._hashlist: if hash_value in self._hashlist[inode]: continue self._hashlist.setdefault(inode, []).append(hash_value) for path_spec in self._ExtractPathSpecsFromFile(sub_file_entry): yield path_spec for sub_file_entry in sub_directories: try: for path_spec in self._ExtractPathSpecsFromDirectory( sub_file_entry, depth=(depth + 1)): yield path_spec except ( IOError, dfvfs_errors.AccessError, dfvfs_errors.BackEndError, dfvfs_errors.PathSpecError) as exception: logger.warning('{0!s}'.format(exception))
def BuildFindSpecs(self, environment_variables=None): """Builds find specifications from artifact definitions. The resulting find specifications are set in the knowledge base. Args: environment_variables (Optional[list[EnvironmentVariableArtifact]]): environment variables. """ for name in self._artifacts: definition = self._artifacts_registry.GetDefinitionByName(name) if not definition: continue for source in definition.sources: if source.type_indicator == artifact_types.TYPE_INDICATOR_FILE: # TODO: move source.paths iteration into # BuildFindSpecsFromFileArtifact. for path_entry in set(source.paths): find_specs = self.BuildFindSpecsFromFileArtifact( path_entry, source.separator, environment_variables, self._knowledge_base.user_accounts) artifact_group = self._find_specs_per_source_type[ artifact_types.TYPE_INDICATOR_FILE] artifact_group.extend(find_specs) elif (source.type_indicator == artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY): # TODO: move source.keys iteration into # BuildFindSpecsFromRegistryArtifact. for key_path in set(source.keys): if self.CheckKeyCompatibility(key_path): find_specs = self.BuildFindSpecsFromRegistryArtifact(key_path) artifact_group = self._find_specs_per_source_type[ artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY] artifact_group.extend(find_specs) elif (source.type_indicator == artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_VALUE): # TODO: Handle Registry Values Once Supported in dfwinreg. # https://github.com/log2timeline/dfwinreg/issues/98 logger.warning(( 'Windows Registry values are not supported, extracting key: ' '"{0!s}"').format(source.key_value_pairs)) # TODO: move source.key_value_pairs iteration into # BuildFindSpecsFromRegistryArtifact. for key_path in set([ key_value['key'] for key_value in source.key_value_pairs]): if self.CheckKeyCompatibility(key_path): find_specs = self.BuildFindSpecsFromRegistryArtifact(key_path) artifact_group = self._find_specs_per_source_type[ artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY] artifact_group.extend(find_specs) elif (source.type_indicator == artifact_types.TYPE_INDICATOR_ARTIFACT_GROUP): self._artifacts.remove(name) for name_entry in set(source.names): self._artifacts.append(name_entry) self.BuildFindSpecs(environment_variables=environment_variables) else: logger.warning( 'Unsupported artifact definition source type: "{0:s}"'.format( source.type_indicator)) self._knowledge_base.SetValue( self.KNOWLEDGE_BASE_VALUE, self._find_specs_per_source_type)
def _ExtractPathSpecsFromDirectory(self, file_entry, depth=0): """Extracts path specification from a directory. Args: file_entry (dfvfs.FileEntry): file entry that refers to the directory. depth (Optional[int]): current depth where 0 represents the file system root. Yields: dfvfs.PathSpec: path specification of a file entry found in the directory. """ if depth >= self._MAXIMUM_DEPTH: raise errors.MaximumRecursionDepth( 'Maximum recursion depth reached.') # Need to do a breadth-first search otherwise we'll hit the Python # maximum recursion depth. sub_directories = [] for sub_file_entry in file_entry.sub_file_entries: try: if not sub_file_entry.IsAllocated() or sub_file_entry.IsLink(): continue except dfvfs_errors.BackEndError as exception: logger.warning( 'Unable to process file: {0:s} with error: {1!s}'.format( sub_file_entry.path_spec.comparable.replace('\n', ';'), exception)) continue # For TSK-based file entries only, ignore the virtual /$OrphanFiles # directory. if sub_file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK: if file_entry.IsRoot( ) and sub_file_entry.name == '$OrphanFiles': continue if sub_file_entry.IsDirectory(): sub_directories.append(sub_file_entry) elif sub_file_entry.IsFile(): # If we are dealing with a VSS we want to calculate a hash # value based on available timestamps and compare that to previously # calculated hash values, and only include the file into the queue if # the hash does not match. if self._duplicate_file_check: hash_value = self._CalculateNTFSTimeHash(sub_file_entry) inode = getattr(sub_file_entry.path_spec, 'inode', 0) if inode in self._hashlist: if hash_value in self._hashlist[inode]: continue self._hashlist.setdefault(inode, []).append(hash_value) for path_spec in self._ExtractPathSpecsFromFile(sub_file_entry): yield path_spec for sub_file_entry in sub_directories: try: for path_spec in self._ExtractPathSpecsFromDirectory( sub_file_entry, depth=(depth + 1)): yield path_spec except (IOError, dfvfs_errors.AccessError, dfvfs_errors.BackEndError, dfvfs_errors.PathSpecError) as exception: logger.warning('{0!s}'.format(exception))
def BuildFindSpecs(self, environment_variables=None): """Build find specification from a filter file. Args: environment_variables (Optional[list[EnvironmentVariableArtifact]]): environment variables. Returns: list[dfvfs.FindSpec]: find specification. """ path_attributes = {} if environment_variables: for environment_variable in environment_variables: attribute_name = environment_variable.name.lower() attribute_value = environment_variable.value if not isinstance(attribute_value, py2to3.STRING_TYPES): continue # Remove the drive letter. if len(attribute_value) > 2 and attribute_value[1] == ':': _, _, attribute_value = attribute_value.rpartition(':') if attribute_value.startswith('\\'): attribute_value = attribute_value.replace('\\', '/') path_attributes[attribute_name] = attribute_value find_specs = [] with open(self._path, 'r') as file_object: for line in file_object: line = line.strip() if line.startswith('#'): continue if path_attributes: try: line = line.format(**path_attributes) except KeyError as exception: logger.error(( 'Unable to expand path filter: {0:s} with error: ' '{1!s}').format(line, exception)) continue if not line.startswith('/'): logger.warning(( 'The path filter must be defined as an absolute path: ' '{0:s}').format(line)) continue # Convert the path filters into a list of path segments and strip # the root path segment. path_segments = line.split('/') path_segments.pop(0) if not path_segments[-1]: logger.warning( 'Empty last path segment in path filter: {0:s}'.format(line)) continue find_spec = file_system_searcher.FindSpec( location_regex=path_segments, case_sensitive=False) find_specs.append(find_spec) return find_specs
def _ParseFileEntryWithParser(self, parser_mediator, parser, file_entry, file_object=None): """Parses a file entry with a specific parser. Args: parser_mediator (ParserMediator): parser mediator. parser (BaseParser): parser. file_entry (dfvfs.FileEntry): file entry. file_object (Optional[file]): file-like object to parse. If not set the parser will use the parser mediator to open the file entry's default data stream as a file-like object. Returns: bool: False if the file could not be parsed and UnableToParseFile was raised. Raises: TypeError: if parser object is not a supported parser type. """ if not isinstance(parser, (parsers_interface.FileEntryParser, parsers_interface.FileObjectParser)): raise TypeError('Unsupported parser object type.') parser_mediator.ClearParserChain() reference_count = ( parser_mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec)) if self._parsers_profiler: self._parsers_profiler.StartTiming(parser.NAME) result = True try: if isinstance(parser, parsers_interface.FileEntryParser): parser.Parse(parser_mediator) elif isinstance(parser, parsers_interface.FileObjectParser): parser.Parse(parser_mediator, file_object) # We catch IOError so we can determine the parser that generated the error. except (IOError, dfvfs_errors.BackEndError) as exception: display_name = parser_mediator.GetDisplayName(file_entry) logger.warning( '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( parser.NAME, display_name, exception)) except errors.UnableToParseFile as exception: display_name = parser_mediator.GetDisplayName(file_entry) logger.debug( '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( parser.NAME, display_name, exception)) result = False finally: if self._parsers_profiler: self._parsers_profiler.StopTiming(parser.NAME) new_reference_count = ( parser_mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec)) if reference_count != new_reference_count: display_name = parser_mediator.GetDisplayName(file_entry) logger.warning( ('[{0:s}] did not explicitly close file-object for file: ' '{1:s}.').format(parser.NAME, display_name)) return result
def BuildFindSpecs(self, environment_variables=None): """Build find specification from a filter file. Args: environment_variables (Optional[list[EnvironmentVariableArtifact]]): environment variables. Returns: list[dfvfs.FindSpec]: find specification. """ path_attributes = {} if environment_variables: for environment_variable in environment_variables: attribute_name = environment_variable.name.lower() attribute_value = environment_variable.value if not isinstance(attribute_value, py2to3.STRING_TYPES): continue # Remove the drive letter. if len(attribute_value) > 2 and attribute_value[1] == ':': _, _, attribute_value = attribute_value.rpartition(':') if attribute_value.startswith('\\'): attribute_value = attribute_value.replace('\\', '/') path_attributes[attribute_name] = attribute_value find_specs = [] with open(self._path, 'r') as file_object: for line in file_object: line = line.strip() if line.startswith('#'): continue if path_attributes: try: line = line.format(**path_attributes) except KeyError as exception: logger.error( ('Unable to expand path filter: {0:s} with error: ' '{1:s}').format(line, exception)) continue if not line.startswith('/'): logger.warning(( 'The path filter must be defined as an absolute path: ' '{0:s}').format(line)) continue # Convert the path filters into a list of path segments and strip # the root path segment. path_segments = line.split('/') path_segments.pop(0) if not path_segments[-1]: logger.warning( 'Empty last path segment in path filter: {0:s}'.format( line)) continue find_spec = file_system_searcher.FindSpec( location_regex=path_segments, case_sensitive=False) find_specs.append(find_spec) return find_specs
def _BuildFindSpecsFromArtifact(self, definition, environment_variables): """Builds find specifications from an artifact definition. Args: definition (artifacts.ArtifactDefinition): artifact definition. environment_variables (list[EnvironmentVariableArtifact]): environment variables. Returns: list[dfvfs.FindSpec|dfwinreg.FindSpec]: dfVFS or dfWinReg find specifications. """ find_specs = [] for source in definition.sources: if source.type_indicator == artifact_types.TYPE_INDICATOR_FILE: for path_entry in set(source.paths): specifications = self._BuildFindSpecsFromFileSourcePath( path_entry, source.separator, environment_variables, self._knowledge_base.user_accounts) find_specs.extend(specifications) self.file_system_artifact_names.add(definition.name) elif (source.type_indicator == artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY): for key_path in set(source.keys): if ArtifactDefinitionsFiltersHelper.CheckKeyCompatibility(key_path): specifications = self._BuildFindSpecsFromRegistrySourceKey(key_path) find_specs.extend(specifications) self.registry_artifact_names.add(definition.name) elif (source.type_indicator == artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_VALUE): # TODO: Handle Registry Values Once Supported in dfwinreg. # https://github.com/log2timeline/dfwinreg/issues/98 # Use set-comprehension to create a set of the source key paths. key_paths = { key_value['key'] for key_value in source.key_value_pairs} key_paths_string = ', '.join(key_paths) logger.warning(( 'Windows Registry values are not supported, extracting keys: ' '"{0!s}"').format(key_paths_string)) for key_path in key_paths: if ArtifactDefinitionsFiltersHelper.CheckKeyCompatibility(key_path): specifications = self._BuildFindSpecsFromRegistrySourceKey(key_path) find_specs.extend(specifications) self.registry_artifact_names.add(definition.name) elif (source.type_indicator == artifact_types.TYPE_INDICATOR_ARTIFACT_GROUP): for name in source.names: specifications = self._BuildFindSpecsFromGroupName( name, environment_variables) find_specs.extend(specifications) else: logger.warning( 'Unsupported artifact definition source type: "{0:s}"'.format( source.type_indicator)) return find_specs
def BuildFindSpecs(self, environment_variables=None): """Builds find specifications from artifact definitions. The resulting find specifications are set in the knowledge base. Args: environment_variables (Optional[list[EnvironmentVariableArtifact]]): environment variables. """ find_specs_per_source_type = { artifact_types.TYPE_INDICATOR_FILE: [], artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY: []} for name in self._artifacts: definition = self._artifacts_registry.GetDefinitionByName(name) if not definition: continue for source in definition.sources: if source.type_indicator == artifact_types.TYPE_INDICATOR_FILE: # TODO: move source.paths iteration into # BuildFindSpecsFromFileArtifact. for path_entry in set(source.paths): find_specs = self.BuildFindSpecsFromFileArtifact( path_entry, source.separator, environment_variables, self._knowledge_base.user_accounts) find_specs_per_source_type[ artifact_types.TYPE_INDICATOR_FILE].extend(find_specs) elif (source.type_indicator == artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY): # TODO: move source.keys iteration into # BuildFindSpecsFromRegistryArtifact. for key_path in set(source.keys): if self._CheckKeyCompatibility(key_path): find_specs = self.BuildFindSpecsFromRegistryArtifact(key_path) find_specs_per_source_type[ artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY].extend( find_specs) elif (source.type_indicator == artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_VALUE): # TODO: Handle Registry Values Once Supported in dfwinreg. # https://github.com/log2timeline/dfwinreg/issues/98 logger.warning(( 'Windows Registry values are not supported, extracting key: ' '"{0!s}"').format(source.key_value_pairs)) # TODO: move source.key_value_pairs iteration into # BuildFindSpecsFromRegistryArtifact. for key_path in set([ key_path for key_path, _ in source.key_value_pairs]): if self._CheckKeyCompatibility(key_path): find_specs = self.BuildFindSpecsFromRegistryArtifact(key_path) find_specs_per_source_type[ artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY].extend( find_specs) else: logger.warning( 'Unsupported artifact definition source type: "{0:s}"'.format( source.type_indicator)) self._knowledge_base.SetValue( self._KNOWLEDGE_BASE_VALUE, find_specs_per_source_type)
def _ParseFileEntryWithParser( self, parser_mediator, parser, file_entry, file_object=None): """Parses a file entry with a specific parser. Args: parser_mediator (ParserMediator): parser mediator. parser (BaseParser): parser. file_entry (dfvfs.FileEntry): file entry. file_object (Optional[file]): file-like object to parse. If not set the parser will use the parser mediator to open the file entry's default data stream as a file-like object. Returns: int: parse result which is _PARSE_RESULT_FAILURE if the file entry could not be parsed, _PARSE_RESULT_SUCCESS if the file entry successfully was parsed or _PARSE_RESULT_UNSUPPORTED when UnableToParseFile was raised. Raises: TypeError: if parser object is not a supported parser type. """ if not isinstance(parser, ( parsers_interface.FileEntryParser, parsers_interface.FileObjectParser)): raise TypeError('Unsupported parser object type.') parser_mediator.ClearParserChain() reference_count = ( parser_mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec)) parser_mediator.SampleStartTiming(parser.NAME) try: if isinstance(parser, parsers_interface.FileEntryParser): parser.Parse(parser_mediator) elif isinstance(parser, parsers_interface.FileObjectParser): parser.Parse(parser_mediator, file_object) result = self._PARSE_RESULT_SUCCESS # We catch IOError so we can determine the parser that generated the error. except (IOError, dfvfs_errors.BackEndError) as exception: display_name = parser_mediator.GetDisplayName(file_entry) logger.warning( '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( parser.NAME, display_name, exception)) result = self._PARSE_RESULT_FAILURE except errors.UnableToParseFile as exception: display_name = parser_mediator.GetDisplayName(file_entry) logger.debug( '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( parser.NAME, display_name, exception)) result = self._PARSE_RESULT_UNSUPPORTED finally: parser_mediator.SampleStopTiming(parser.NAME) parser_mediator.SampleMemoryUsage(parser.NAME) new_reference_count = ( parser_mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec)) if reference_count != new_reference_count: display_name = parser_mediator.GetDisplayName(file_entry) logger.warning(( '[{0:s}] did not explicitly close file-object for file: ' '{1:s}.').format(parser.NAME, display_name)) return result