def testBuildFindSpecs(self): """Tests the BuildFindSpecs function.""" filter_file_path = '' with tempfile.NamedTemporaryFile(delete=False) as temp_file: test_filter_file = filter_file.FilterFile(temp_file.name) # 2 hits. temp_file.write(b'/test_data/testdir/filter_.+.txt\n') # A single hit. temp_file.write(b'/test_data/.+evtx\n') # A single hit. temp_file.write(b'/AUTHORS\n') temp_file.write(b'/does_not_exist/some_file_[0-9]+txt\n') # Path expansion. temp_file.write(b'{systemroot}/Tasks/.+[.]job\n') # This should not compile properly, missing file information. temp_file.write(b'failing/\n') # This should not fail during initial loading, but fail later on. temp_file.write(b'bad re (no close on that parenthesis/file\n') environment_variable = artifacts.EnvironmentVariableArtifact( case_sensitive=False, name='SystemRoot', value='C:\\Windows') find_specs = test_filter_file.BuildFindSpecs( environment_variables=[environment_variable]) try: os.remove(filter_file_path) except (OSError, IOError) as exception: logging.warning( 'Unable to remove filter file: {0:s} with error: {1!s}'.format( filter_file_path, exception)) self.assertEqual(len(find_specs), 5) path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location='.') file_system = path_spec_resolver.Resolver.OpenFileSystem(path_spec) searcher = file_system_searcher.FileSystemSearcher( file_system, path_spec) path_spec_generator = searcher.Find(find_specs=find_specs) self.assertIsNotNone(path_spec_generator) path_specs = list(path_spec_generator) # Two evtx, one symbolic link to evtx, one AUTHORS, two filter_*.txt files, # total 6 path specifications. self.assertEqual(len(path_specs), 6) with self.assertRaises(IOError): test_filter_file = filter_file.FilterFile('thisfiledoesnotexist') test_filter_file.BuildFindSpecs() file_system.Close()
def testBuildFindSpecs(self): """Tests the BuildFindSpecs function.""" test_filter_file = filter_file.FilterFile() test_path_filters = test_filter_file._ReadFromFileObject( io.StringIO(self._FILTER_FILE_DATA)) environment_variable = artifacts.EnvironmentVariableArtifact( case_sensitive=False, name='SystemRoot', value='C:\\Windows') test_helper = path_filters.PathCollectionFiltersHelper() test_helper.BuildFindSpecs( test_path_filters, environment_variables=[environment_variable]) self.assertEqual(len(test_helper.included_file_system_find_specs), 5) path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location='.') file_system = path_spec_resolver.Resolver.OpenFileSystem(path_spec) searcher = file_system_searcher.FileSystemSearcher( file_system, path_spec) path_spec_generator = searcher.Find( find_specs=test_helper.included_file_system_find_specs) self.assertIsNotNone(path_spec_generator) path_specs = list(path_spec_generator) file_system.Close() # Two evtx, one symbolic link to evtx, one AUTHORS, two filter_*.txt files, # total 6 path specifications. self.assertEqual(len(path_specs), 6)
def BuildFilterFindSpecs(cls, artifact_definitions_path, custom_artifacts_path, knowledge_base_object, artifact_filter_names=None, filter_file_path=None): """Builds find specifications from artifacts or filter file if available. Args: artifact_definitions_path (str): path to artifact definitions file. custom_artifacts_path (str): path to custom artifact definitions file. knowledge_base_object (KnowledgeBase): knowledge base. artifact_filter_names (Optional[list[str]]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. filter_file_path (Optional[str]): path of filter file. Returns: list[dfvfs.FindSpec]: find specifications for the file source type. Raises: InvalidFilter: if no valid FindSpecs are built. """ environment_variables = knowledge_base_object.GetEnvironmentVariables() find_specs = None if artifact_filter_names: logger.debug( 'building find specification based on artifacts: {0:s}'.format( ', '.join(artifact_filter_names))) artifacts_registry_object = cls.BuildArtifactsRegistry( artifact_definitions_path, custom_artifacts_path) artifact_filters_object = ( artifact_filters.ArtifactDefinitionsFilterHelper( artifacts_registry_object, artifact_filter_names, knowledge_base_object)) artifact_filters_object.BuildFindSpecs( environment_variables=environment_variables) find_specs = knowledge_base_object.GetValue( artifact_filters_object.KNOWLEDGE_BASE_VALUE)[ artifact_types.TYPE_INDICATOR_FILE] elif filter_file_path: logger.debug( 'building find specification based on filter file: {0:s}'. format(filter_file_path)) filter_file_object = filter_file.FilterFile(filter_file_path) find_specs = filter_file_object.BuildFindSpecs( environment_variables=environment_variables) if (artifact_filter_names or filter_file_path) and not find_specs: raise errors.InvalidFilter( 'Error processing filters, no valid specifications built.') return find_specs
def testReadFromFileObject(self): """Tests the _ReadFromFileObject function.""" test_file_path = self._GetTestFilePath( ['filter_files', 'format_test.txt']) self._SkipIfPathNotExists(test_file_path) test_filter_file = filter_file.FilterFile() with io.open(test_file_path, 'r', encoding='utf-8') as file_object: path_filters = list( test_filter_file._ReadFromFileObject(file_object)) self.assertEqual(len(path_filters), 1)
def testReadFromFile(self): """Tests the ReadFromFile function.""" test_file_path = self._GetTestFilePath( ['filter_files', 'format_test.txt']) self._SkipIfPathNotExists(test_file_path) test_filter_file = filter_file.FilterFile() path_filters = test_filter_file.ReadFromFile(test_file_path) self.assertEqual(len(path_filters), 1) self.assertEqual(path_filters[0].path_separator, '/') self.assertEqual(path_filters[0].paths, ['/usr/bin', '/Windows/System32'])
def _ExtractWithFilter(self, source_path_specs, destination_path, output_writer, filter_file_path, skip_duplicates=True): """Extracts files using a filter expression. This method runs the file extraction process on the image and potentially on every VSS if that is wanted. Args: source_path_specs (list[dfvfs.PathSpec]): path specifications to extract. destination_path (str): path where the extracted files should be stored. output_writer (CLIOutputWriter): output writer. filter_file_path (str): path of the file that contains the filter expressions. skip_duplicates (Optional[bool]): True if files with duplicate content should be skipped. """ for source_path_spec in source_path_specs: file_system, mount_point = self._GetSourceFileSystem( source_path_spec, resolver_context=self._resolver_context) if self._knowledge_base is None: self._Preprocess(file_system, mount_point) display_name = path_helper.PathHelper.GetDisplayNameForPathSpec( source_path_spec) output_writer.Write( 'Extracting file entries from: {0:s}\n'.format(display_name)) environment_variables = self._knowledge_base.GetEnvironmentVariables( ) filter_file_object = filter_file.FilterFile(filter_file_path) find_specs = filter_file_object.BuildFindSpecs( environment_variables=environment_variables) searcher = file_system_searcher.FileSystemSearcher( file_system, mount_point) for path_spec in searcher.Find(find_specs=find_specs): self._ExtractFileEntry(path_spec, destination_path, output_writer, skip_duplicates=skip_duplicates) file_system.Close()
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) self._source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, self._source_type, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( command_line_arguments=self._command_line_arguments, debug_mode=self._debug_mode, filter_file=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format(self._storage_format)) single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) self._SetExtractionParsersAndPlugins(configuration, session) self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) filter_find_specs = None if configuration.filter_file: environment_variables = ( extraction_engine.knowledge_base.GetEnvironmentVariables()) filter_file_object = filter_file.FilterFile(configuration.filter_file) filter_find_specs = filter_file_object.BuildFindSpecs( environment_variables=environment_variables) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, filter_find_specs=filter_find_specs, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, filter_find_specs=filter_find_specs, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback, worker_memory_limit=self._worker_memory_limit) self._status_view.PrintExtractionSummary(processing_status)
def BuildCollectionFilters(self, artifact_definitions_path, custom_artifacts_path, knowledge_base_object, artifact_filter_names=None, filter_file_path=None): """Builds collection filters from artifacts or filter file if available. Args: artifact_definitions_path (str): path to artifact definitions file. custom_artifacts_path (str): path to custom artifact definitions file. knowledge_base_object (KnowledgeBase): knowledge base. artifact_filter_names (Optional[list[str]]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. filter_file_path (Optional[str]): path of filter file. Raises: InvalidFilter: if no valid file system find specifications are built. """ environment_variables = knowledge_base_object.GetEnvironmentVariables() if artifact_filter_names: logger.debug( 'building find specification based on artifacts: {0:s}'.format( ', '.join(artifact_filter_names))) artifacts_registry_object = BaseEngine.BuildArtifactsRegistry( artifact_definitions_path, custom_artifacts_path) self.collection_filters_helper = ( artifact_filters.ArtifactDefinitionsFiltersHelper( artifacts_registry_object, knowledge_base_object)) self.collection_filters_helper.BuildFindSpecs( artifact_filter_names, environment_variables=environment_variables) # If the user selected Windows Registry artifacts we have to ensure # the Windows Registry files are parsed. if self.collection_filters_helper.registry_find_specs: self.collection_filters_helper.BuildFindSpecs( self._WINDOWS_REGISTRY_FILES_ARTIFACT_NAMES, environment_variables=environment_variables) if not self.collection_filters_helper.included_file_system_find_specs: raise errors.InvalidFilter( 'No valid file system find specifications were built from ' 'artifacts.') elif filter_file_path: logger.debug( 'building find specification based on filter file: {0:s}'. format(filter_file_path)) filter_file_path_lower = filter_file_path.lower() if (filter_file_path_lower.endswith('.yaml') or filter_file_path_lower.endswith('.yml')): filter_file_object = yaml_filter_file.YAMLFilterFile() else: filter_file_object = filter_file.FilterFile() filter_file_path_filters = filter_file_object.ReadFromFile( filter_file_path) self.collection_filters_helper = ( path_filters.PathCollectionFiltersHelper()) self.collection_filters_helper.BuildFindSpecs( filter_file_path_filters, environment_variables=environment_variables) if (not self.collection_filters_helper. excluded_file_system_find_specs and not self. collection_filters_helper.included_file_system_find_specs): raise errors.InvalidFilter(( 'No valid file system find specifications were built from filter ' 'file: {0:s}.').format(filter_file_path))
def ExtractEventsFromSources(self): """Processes the sources and extract events. This is a stripped down copy of tools/log2timeline.py that doesn't support the full set of flags. The defaults for these are hard coded in the constructor of this class. Raises: SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, source_type, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( command_line_arguments=self._command_line_arguments, filter_file=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_zip_file.ZIPStorageFileWriter( session, self._storage_file_path) configuration = self._CreateProcessingConfiguration() single_process_mode = self._single_process_mode if source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) # If the source is a directory or a storage media image # run pre-processing. if source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) if not configuration.parser_filter_expression: operating_system = extraction_engine.knowledge_base.GetValue( 'operating_system') operating_system_product = extraction_engine.knowledge_base.GetValue( 'operating_system_product') operating_system_version = extraction_engine.knowledge_base.GetValue( 'operating_system_version') parser_filter_expression = ( self._parsers_manager.GetPresetForOperatingSystem( operating_system, operating_system_product, operating_system_version)) if parser_filter_expression: logging.info('Parser filter expression changed to: {0:s}'.format( parser_filter_expression)) configuration.parser_filter_expression = parser_filter_expression session.enabled_parser_names = list( self._parsers_manager.GetParserAndPluginNames( parser_filter_expression=configuration.parser_filter_expression)) session.parser_filter_expression = configuration.parser_filter_expression # Note session.preferred_time_zone will default to UTC but # self._preferred_time_zone is None when not set. if self._preferred_time_zone: try: extraction_engine.knowledge_base.SetTimeZone(self._preferred_time_zone) except ValueError: # pylint: disable=protected-access logging.warning( 'Unsupported time zone: {0:s}, defaulting to {1:s}'.format( self._preferred_time_zone, extraction_engine.knowledge_base._time_zone.zone)) filter_find_specs = None if configuration.filter_file: environment_variables = ( extraction_engine.knowledge_base.GetEnvironmentVariables()) filter_file_object = filter_file.FilterFile(configuration.filter_file) filter_find_specs = filter_file_object.BuildFindSpecs( environment_variables=environment_variables) processing_status = None if single_process_mode: logging.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, filter_find_specs=filter_find_specs, status_update_callback=status_update_callback) else: logging.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, filter_find_specs=filter_find_specs, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback) self._status_view.PrintExtractionSummary(processing_status)
def BuildFilterFindSpecs(self, artifact_definitions_path, custom_artifacts_path, knowledge_base_object, artifact_filter_names=None, filter_file_path=None): """Builds find specifications from artifacts or filter file if available. Args: artifact_definitions_path (str): path to artifact definitions file. custom_artifacts_path (str): path to custom artifact definitions file. knowledge_base_object (KnowledgeBase): knowledge base. artifact_filter_names (Optional[list[str]]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. filter_file_path (Optional[str]): path of filter file. Returns: list[dfvfs.FindSpec]: find specifications for the file source type. Raises: InvalidFilter: if no valid FindSpecs are built. """ environment_variables = knowledge_base_object.GetEnvironmentVariables() find_specs = None if artifact_filter_names: logger.debug( 'building find specification based on artifacts: {0:s}'.format( ', '.join(artifact_filter_names))) artifacts_registry_object = BaseEngine.BuildArtifactsRegistry( artifact_definitions_path, custom_artifacts_path) self._artifacts_filter_helper = ( artifact_filters.ArtifactDefinitionsFilterHelper( artifacts_registry_object, knowledge_base_object)) self._artifacts_filter_helper.BuildFindSpecs( artifact_filter_names, environment_variables=environment_variables) # If the user selected Windows Registry artifacts we have to ensure # the Windows Registry files are parsed. if self._artifacts_filter_helper.registry_find_specs: self._artifacts_filter_helper.BuildFindSpecs( self._WINDOWS_REGISTRY_FILES_ARTIFACT_NAMES, environment_variables=environment_variables) find_specs = self._artifacts_filter_helper.file_system_find_specs if not find_specs: raise errors.InvalidFilter( 'No valid file system find specifications were built from ' 'artifacts.') elif filter_file_path: logger.debug( 'building find specification based on filter file: {0:s}'. format(filter_file_path)) filter_file_object = filter_file.FilterFile(filter_file_path) find_specs = filter_file_object.BuildFindSpecs( environment_variables=environment_variables) if not find_specs: raise errors.InvalidFilter( 'No valid file system find specifications were built from filter ' 'file.') return find_specs