def testCalculateNTFSTimeHash(self): """Tests the _CalculateNTFSTimeHash function.""" # Note that the source file is a RAW (VMDK flat) image. test_file = self._GetTestFilePath(['multi_partition_image.vmdk']) image_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file) p1_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION, location='/p1', part_index=2, start_offset=0x00010000, parent=image_path_spec) p1_file_system_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location='/file1.txt', parent=p1_path_spec) file_entry = path_spec_resolver.Resolver.OpenFileEntry( p1_file_system_path_spec) test_extractor = extractors.PathSpecExtractor() hash_value = test_extractor._CalculateNTFSTimeHash(file_entry) self.assertEqual(hash_value, '6b181becbc9529b73cf3dc35c99a61e7') p2_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION, location='/p2', part_index=3, start_offset=0x00510000, parent=image_path_spec) p2_file_system_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location='/file2_on_part_2.txt', parent=p2_path_spec) file_entry = path_spec_resolver.Resolver.OpenFileEntry( p2_file_system_path_spec) test_extractor = extractors.PathSpecExtractor() hash_value = test_extractor._CalculateNTFSTimeHash(file_entry) self.assertEqual(hash_value, '8738ed1e707fec64cd1593fd81eb26d2')
def testExtractPathSpecsStorageMediaImage(self): """Tests the ExtractPathSpecs function an image file. The image file contains the following files: * logs/hidden.zip * logs/sys.tgz The hidden.zip file contains one file, syslog, which is the same for sys.tgz. The end results should therefore be: * logs/hidden.zip (unchanged) * logs/hidden.zip:syslog (the text file extracted out) * logs/sys.tgz (unchanged) * logs/sys.tgz (read as a GZIP file, so not compressed) * logs/sys.tgz:syslog.gz (A GZIP file from the TAR container) * logs/sys.tgz:syslog.gz:syslog (the extracted syslog file) This means that the collection script should collect 6 files in total. """ test_file_path = self._GetTestFilePath(['syslog_image.dd']) self._SkipIfPathNotExists(test_file_path) volume_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location='/', parent=volume_path_spec) resolver_context = context.Context() test_extractor = extractors.PathSpecExtractor() path_specs = list(test_extractor.ExtractPathSpecs( [source_path_spec], resolver_context=resolver_context)) self.assertEqual(len(path_specs), 3)
def __init__(self, parser_filter_expression=None): """Initializes an event extraction worker. Args: parser_filter_expression (Optional[str]): parser filter expression, where None represents all parsers and plugins. A parser filter expression is a comma separated value string that denotes which parsers and plugins should be used. See filters/parser_filter.py for details of the expression syntax. This function does not support presets, and requires a parser filter expression where presets have been expanded. """ super(EventExtractionWorker, self).__init__() self._abort = False self._analyzers = [] self._analyzers_profiler = None self._event_extractor = extractors.EventExtractor( parser_filter_expression=parser_filter_expression) self._hasher_file_size_limit = None self._path_spec_extractor = extractors.PathSpecExtractor() self._process_archives = None self._process_compressed_streams = None self._processing_profiler = None self.last_activity_timestamp = 0.0 self.processing_status = definitions.STATUS_INDICATOR_IDLE
def __init__( self, maximum_number_of_tasks=_MAXIMUM_NUMBER_OF_TASKS): """Initializes an engine. Args: maximum_number_of_tasks (Optional[int]): maximum number of concurrent tasks, where 0 represents no limit. """ super(TaskMultiProcessEngine, self).__init__() self._enable_sigsegv_handler = False self._last_worker_number = 0 self._maximum_number_of_tasks = maximum_number_of_tasks self._merge_task = None self._merge_task_on_hold = None self._number_of_consumed_event_tags = 0 self._number_of_consumed_events = 0 self._number_of_consumed_reports = 0 self._number_of_consumed_sources = 0 self._number_of_consumed_warnings = 0 self._number_of_produced_event_tags = 0 self._number_of_produced_events = 0 self._number_of_produced_reports = 0 self._number_of_produced_sources = 0 self._number_of_produced_warnings = 0 self._number_of_worker_processes = 0 self._path_spec_extractor = extractors.PathSpecExtractor() self._processing_configuration = None self._resolver_context = context.Context() self._session_identifier = None self._status = definitions.STATUS_INDICATOR_IDLE self._storage_merge_reader = None self._storage_merge_reader_on_hold = None self._task_queue = None self._task_queue_port = None self._task_manager = task_manager.TaskManager()
def _ExtractEventSources(self, source_path_specs, storage_writer, filter_find_specs=None, resolver_context=None): """Processes the sources and extract event sources. Args: source_path_specs: a list of path specifications (instances of dfvfs.PathSpec) of the sources to process. resolver_context: resolver context (instance of dfvfs.Context). storage_writer: a storage writer object (instance of StorageWriter). filter_find_specs: optional list of filter find specifications (instances of dfvfs.FindSpec). """ path_spec_extractor = extractors.PathSpecExtractor(resolver_context) for path_spec in path_spec_extractor.ExtractPathSpecs( source_path_specs, find_specs=filter_find_specs): # TODO: determine if event sources should be DataStream or FileEntry # or both. event_source = event_sources.FileEntryEventSource( path_spec=path_spec) storage_writer.AddEventSource(event_source)
def __init__(self, parser_filter_expression=None): """Initializes an event extraction worker. Args: parser_filter_expression (Optional[str]): parser filter expression, where None represents all parsers and plugins. The parser filter expression is a comma separated value string that denotes a list of parser names to include and/or exclude. Each entry can have the value of: * An exact match of a list of parsers, or a preset (see plaso/parsers/presets.py for a full list of available presets). * A name of a single parser (case insensitive), e.g. msiecf. * A glob name for a single parser, e.g. '*msie*' (case insensitive). """ super(EventExtractionWorker, self).__init__() self._abort = False self._analyzers = [] self._event_extractor = extractors.EventExtractor( parser_filter_expression=parser_filter_expression) self._hasher_file_size_limit = None self._path_spec_extractor = extractors.PathSpecExtractor() self._process_archives = None self._process_compressed_streams = None self._processing_profiler = None self.last_activity_timestamp = 0.0 self.processing_status = definitions.PROCESSING_STATUS_IDLE
def testExtractPathSpecsFileSystem(self): """Tests the ExtractPathSpecs function on the file system.""" test_file_paths = [] test_file_path = self._GetTestFilePath(['syslog.bz2']) self._SkipIfPathNotExists(test_file_path) test_file_paths.append(test_file_path) test_file_path = self._GetTestFilePath(['syslog.tgz']) self._SkipIfPathNotExists(test_file_path) test_file_paths.append(test_file_path) test_file_path = self._GetTestFilePath(['syslog.zip']) self._SkipIfPathNotExists(test_file_path) test_file_paths.append(test_file_path) test_file_path = self._GetTestFilePath(['wtmp.1']) self._SkipIfPathNotExists(test_file_path) test_file_paths.append(test_file_path) with shared_test_lib.TempDirectory() as temp_directory: for a_file in test_file_paths: shutil.copy(a_file, temp_directory) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=temp_directory) resolver_context = context.Context() test_extractor = extractors.PathSpecExtractor() path_specs = list(test_extractor.ExtractPathSpecs( [source_path_spec], resolver_context=resolver_context)) self.assertEqual(len(path_specs), 4)
def __init__(self, input_reader=None, output_writer=None): """Initializes the CLI tool object. Args: input_reader (Optional[InputReader]): input reader, where None indicates that the stdin input reader should be used. output_writer (Optional[OutputWriter]): output writer, where None indicates that the stdout output writer should be used. """ super(ImageExportTool, self).__init__(input_reader=input_reader, output_writer=output_writer) self._abort = False self._artifact_definitions_path = None self._artifact_filters = None self._artifacts_registry = None self._custom_artifacts_path = None self._destination_path = None self._digests = {} self._filter_collection = file_entry_filters.FileEntryFilterCollection( ) self._filter_file = None self._knowledge_base = knowledge_base.KnowledgeBase() self._path_spec_extractor = extractors.PathSpecExtractor() self._process_memory_limit = None self._resolver_context = context.Context() self._skip_duplicates = True self.has_filters = False self.list_signature_identifiers = False
def __init__(self): """Initializes the front-end object.""" super(ImageExportFrontend, self).__init__() self._abort = False self._digests = {} self._filter_collection = FileEntryFilterCollection() self._knowledge_base = None self._path_spec_extractor = extractors.PathSpecExtractor() self._resolver_context = context.Context()
def __init__(self): """Initializes a single process engine.""" super(SingleProcessEngine, self).__init__() self._current_display_name = '' self._last_status_update_timestamp = 0.0 self._path_spec_extractor = extractors.PathSpecExtractor() self._pid = os.getpid() self._process_information = process_info.ProcessInfo(self._pid) self._processing_configuration = None self._status_update_callback = None
def _ProcessSources( self, source_path_specs, storage_writer, filter_find_specs=None): """Processes the sources. Args: source_path_specs (list[dfvfs.PathSpec]): path specifications of the sources to process. storage_writer (StorageWriter): storage writer for a session storage. filter_find_specs (Optional[list[dfvfs.FindSpec]]): find specifications used in path specification extraction. If set, path specs that match the find specification will be processed. """ if self._processing_profiler: self._processing_profiler.StartTiming(u'process_sources') self._status = definitions.PROCESSING_STATUS_COLLECTING self._number_of_consumed_errors = 0 self._number_of_consumed_event_tags = 0 self._number_of_consumed_events = 0 self._number_of_consumed_reports = 0 self._number_of_consumed_sources = 0 self._number_of_produced_errors = 0 self._number_of_produced_event_tags = 0 self._number_of_produced_events = 0 self._number_of_produced_reports = 0 self._number_of_produced_sources = 0 path_spec_extractor = extractors.PathSpecExtractor(self._resolver_context) for path_spec in path_spec_extractor.ExtractPathSpecs( source_path_specs, find_specs=filter_find_specs, recurse_file_system=False): if self._abort: break # TODO: determine if event sources should be DataStream or FileEntry # or both. event_source = event_sources.FileEntryEventSource(path_spec=path_spec) storage_writer.AddEventSource(event_source) self._number_of_produced_sources = storage_writer.number_of_event_sources self._ScheduleTasks(storage_writer) if self._abort: self._status = definitions.PROCESSING_STATUS_ABORTED else: self._status = definitions.PROCESSING_STATUS_COMPLETED self._number_of_produced_errors = storage_writer.number_of_errors self._number_of_produced_events = storage_writer.number_of_events self._number_of_produced_sources = storage_writer.number_of_event_sources if self._processing_profiler: self._processing_profiler.StopTiming(u'process_sources')
def testExtractPathSpecsFileSystemWithFindSpecs(self): """Tests the ExtractPathSpecs function with find specifications.""" test_file_path = self._GetTestFilePath(['System.evtx']) self._SkipIfPathNotExists(test_file_path) test_file_path = self._GetTestFilePath(['testdir', 'filter_1.txt']) self._SkipIfPathNotExists(test_file_path) test_file_path = self._GetTestFilePath(['testdir', 'filter_3.txt']) self._SkipIfPathNotExists(test_file_path) location_expressions = [ '/test_data/testdir/filter_.+.txt', '/test_data/.+evtx', '/AUTHORS', '/does_not_exist/some_file_[0-9]+txt'] source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location='.') resolver_context = context.Context() test_extractor = extractors.PathSpecExtractor() find_specs = self._GetFindSpecs(location_expressions) path_specs = list(test_extractor.ExtractPathSpecs( [source_path_spec], find_specs=find_specs, resolver_context=resolver_context)) # Two files with test_data/testdir/filter_*.txt, AUTHORS, # test_data/System.evtx and test_data/System2.evtx and # a symbolic link test_data/link_to_System.evtx. self.assertEqual(len(path_specs), 6) paths = self._GetFilePaths(path_specs) current_directory = os.getcwd() expected_path = os.path.join( current_directory, 'test_data', 'testdir', 'filter_1.txt') self.assertTrue(expected_path in paths) expected_path = os.path.join( current_directory, 'test_data', 'testdir', 'filter_2.txt') self.assertFalse(expected_path in paths) expected_path = os.path.join( current_directory, 'test_data', 'testdir', 'filter_3.txt') self.assertTrue(expected_path in paths) expected_path = os.path.join(current_directory, 'AUTHORS') self.assertTrue(expected_path in paths)
def testExtractPathSpecsFileSystemWithFilter(self): """Tests the ExtractPathSpecs function on the file system with a filter.""" source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=u'.') filter_name = '' with tempfile.NamedTemporaryFile(delete=False) as temp_file: filter_name = temp_file.name temp_file.write('/test_data/testdir/filter_.+.txt\n') temp_file.write('/test_data/.+evtx\n') temp_file.write('/AUTHORS\n') temp_file.write('/does_not_exist/some_file_[0-9]+txt\n') resolver_context = context.Context() test_extractor = extractors.PathSpecExtractor(resolver_context) find_specs = engine_utils.BuildFindSpecsFromFile(filter_name) path_specs = list( test_extractor.ExtractPathSpecs([source_path_spec], find_specs=find_specs)) try: os.remove(filter_name) except (OSError, IOError) as exception: logging.warning( (u'Unable to remove temporary file: {0:s} with error: {1:s}' ).format(filter_name, exception)) # Two files with test_data/testdir/filter_*.txt, AUTHORS # and test_data/System.evtx. self.assertEqual(len(path_specs), 4) paths = self._GetFilePaths(path_specs) current_directory = os.getcwd() expected_path = os.path.join(current_directory, u'test_data', u'testdir', u'filter_1.txt') self.assertTrue(expected_path in paths) expected_path = os.path.join(current_directory, u'test_data', u'testdir', u'filter_2.txt') self.assertFalse(expected_path in paths) expected_path = os.path.join(current_directory, u'test_data', u'testdir', u'filter_3.txt') self.assertTrue(expected_path in paths) expected_path = os.path.join(current_directory, u'AUTHORS') self.assertTrue(expected_path in paths)
def testExtractPathSpecsStorageMediaImageWithFilter(self): """Tests the ExtractPathSpecs function on an image file with a filter.""" test_file = self._GetTestFilePath([u'ímynd.dd']) volume_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', parent=volume_path_spec) filter_name = '' with tempfile.NamedTemporaryFile(delete=False) as temp_file: filter_name = temp_file.name temp_file.write('/a_directory/.+zip\n') temp_file.write('/a_directory/another.+\n') temp_file.write('/passwords.txt\n') resolver_context = context.Context() test_extractor = extractors.PathSpecExtractor(resolver_context) find_specs = engine_utils.BuildFindSpecsFromFile(filter_name) path_specs = list( test_extractor.ExtractPathSpecs([source_path_spec], find_specs=find_specs)) try: os.remove(filter_name) except (OSError, IOError) as exception: logging.warning( (u'Unable to remove temporary file: {0:s} with error: {1:s}' ).format(filter_name, exception)) self.assertEqual(len(path_specs), 2) paths = self._GetFilePaths(path_specs) # path_specs[0] # type: TSK # file_path: '/a_directory/another_file' # container_path: 'test_data/ímynd.dd' # image_offset: 0 self.assertEqual(paths[0], u'/a_directory/another_file') # path_specs[1] # type: TSK # file_path: '/passwords.txt' # container_path: 'test_data/ímynd.dd' # image_offset: 0 self.assertEqual(paths[1], u'/passwords.txt')
def __init__(self): """Initializes a single process engine.""" super(SingleProcessEngine, self).__init__() self._current_display_name = '' self._extraction_worker = None self._file_system_cache = [] self._number_of_consumed_sources = 0 self._parsers_counter = None self._path_spec_extractor = extractors.PathSpecExtractor() self._pid = os.getpid() self._process_information = process_info.ProcessInfo(self._pid) self._processing_configuration = None self._resolver_context = None self._status = definitions.STATUS_INDICATOR_IDLE self._status_update_active = False self._status_update_callback = None self._status_update_thread = None self._storage_writer = None
def __init__(self, maximum_number_of_tasks=_MAXIMUM_NUMBER_OF_TASKS, use_zeromq=True): """Initializes an engine object. Args: maximum_number_of_tasks (Optional[int]): maximum number of concurrent tasks, where 0 represents no limit. use_zeromq (Optional[bool]): True if ZeroMQ should be used for queuing instead of Python's multiprocessing queue. """ super(TaskMultiProcessEngine, self).__init__() self._enable_sigsegv_handler = False self._filter_find_specs = None self._last_worker_number = 0 self._maximum_number_of_tasks = maximum_number_of_tasks self._memory_profiler = None self._merge_task = None self._merge_task_on_hold = None self._number_of_consumed_errors = 0 self._number_of_consumed_event_tags = 0 self._number_of_consumed_events = 0 self._number_of_consumed_reports = 0 self._number_of_consumed_sources = 0 self._number_of_produced_errors = 0 self._number_of_produced_event_tags = 0 self._number_of_produced_events = 0 self._number_of_produced_reports = 0 self._number_of_produced_sources = 0 self._number_of_worker_processes = 0 self._path_spec_extractor = extractors.PathSpecExtractor() self._processing_configuration = None self._processing_profiler = None self._resolver_context = context.Context() self._serializers_profiler = None self._session_identifier = None self._status = definitions.PROCESSING_STATUS_IDLE self._storage_merge_reader = None self._storage_merge_reader_on_hold = None self._task_queue = None self._task_queue_port = None self._task_manager = task_manager.TaskManager() self._use_zeromq = use_zeromq
def testExtractPathSpecsStorageMediaImageWithFilter(self): """Tests the ExtractPathSpecs function on an image file with a filter.""" location_expressions = [ '/a_directory/.+zip', '/a_directory/another.+', '/passwords.txt' ] test_file_path = self._GetTestFilePath(['ímynd.dd']) self._SkipIfPathNotExists(test_file_path) volume_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location='/', parent=volume_path_spec) resolver_context = context.Context() test_extractor = extractors.PathSpecExtractor() find_specs = self._GetFindSpecs(location_expressions) path_specs = list( test_extractor.ExtractPathSpecs([source_path_spec], find_specs=find_specs, resolver_context=resolver_context)) self.assertEqual(len(path_specs), 2) paths = self._GetFilePaths(path_specs) # path_specs[0] # path_spec_type: TSK # file_path: '/a_directory/another_file' # container_path: 'test_data/ímynd.dd' # image_offset: 0 self.assertEqual(paths[0], '/a_directory/another_file') # path_specs[1] # path_spec_type: TSK # file_path: '/passwords.txt' # container_path: 'test_data/ímynd.dd' # image_offset: 0 self.assertEqual(paths[1], '/passwords.txt')
def _Extract(self, source_path_specs, destination_path, output_writer, skip_duplicates=True): """Extracts files. Args: source_path_specs (list[dfvfs.PathSpec]): path specifications to extract. destination_path (str): path where the extracted files should be stored. output_writer (CLIOutputWriter): output writer. skip_duplicates (Optional[bool]): True if files with duplicate content should be skipped. """ output_writer.Write(u'Extracting file entries.\n') path_spec_extractor = extractors.PathSpecExtractor( self._resolver_context) for path_spec in path_spec_extractor.ExtractPathSpecs( source_path_specs): self._ExtractFileEntry(path_spec, destination_path, output_writer, skip_duplicates=skip_duplicates)
def _Extract(self, source_path_specs, destination_path, remove_duplicates=True): """Extracts files. Args: source_path_specs: list of path specifications (instances of dfvfs.PathSpec) to process. destination_path: the path where the extracted files should be stored. remove_duplicates: optional boolean value to indicate if files with duplicate content should be removed. The default is True. """ if not os.path.isdir(destination_path): os.makedirs(destination_path) path_spec_extractor = extractors.PathSpecExtractor( self._resolver_context) file_saver = FileSaver(skip_duplicates=remove_duplicates) for path_spec in path_spec_extractor.ExtractPathSpecs( source_path_specs): self._ExtractFile(file_saver, path_spec, destination_path)
def __init__(self, maximum_number_of_tasks=None, number_of_worker_processes=0, worker_memory_limit=None, worker_timeout=None): """Initializes an engine. Args: maximum_number_of_tasks (Optional[int]): maximum number of concurrent tasks, where 0 represents no limit. number_of_worker_processes (Optional[int]): number of worker processes. worker_memory_limit (Optional[int]): maximum amount of memory a worker is allowed to consume, where None represents the default memory limit and 0 represents no limit. worker_timeout (Optional[float]): number of minutes before a worker process that is not providing status updates is considered inactive, where None or 0.0 represents the default timeout. """ if maximum_number_of_tasks is None: maximum_number_of_tasks = self._MAXIMUM_NUMBER_OF_TASKS if number_of_worker_processes < 1: # One worker for each "available" CPU (minus other processes). # The number here is derived from the fact that the engine starts up: # * A main process. # # If we want to utilize all CPUs on the system we therefore need to start # up workers that amounts to the total number of CPUs - the other # processes. try: cpu_count = multiprocessing.cpu_count() - 1 if cpu_count <= self._WORKER_PROCESSES_MINIMUM: cpu_count = self._WORKER_PROCESSES_MINIMUM elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM: cpu_count = self._WORKER_PROCESSES_MAXIMUM except NotImplementedError: logger.error(( 'Unable to determine number of CPUs defaulting to {0:d} worker ' 'processes.').format(self._WORKER_PROCESSES_MINIMUM)) cpu_count = self._WORKER_PROCESSES_MINIMUM number_of_worker_processes = cpu_count if worker_memory_limit is None: worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT if not worker_timeout: worker_timeout = definitions.DEFAULT_WORKER_TIMEOUT super(ExtractionMultiProcessEngine, self).__init__() self._enable_sigsegv_handler = False self._last_worker_number = 0 self._maximum_number_of_containers = 50 self._maximum_number_of_tasks = maximum_number_of_tasks self._merge_task = None self._merge_task_on_hold = None self._number_of_consumed_events = 0 self._number_of_consumed_event_tags = 0 self._number_of_consumed_extraction_warnings = 0 self._number_of_consumed_reports = 0 self._number_of_consumed_sources = 0 self._number_of_produced_events = 0 self._number_of_produced_event_tags = 0 self._number_of_produced_extraction_warnings = 0 self._number_of_produced_reports = 0 self._number_of_produced_sources = 0 self._number_of_worker_processes = number_of_worker_processes self._parsers_counter = None self._path_spec_extractor = extractors.PathSpecExtractor() self._resolver_context = context.Context() self._status = definitions.STATUS_INDICATOR_IDLE self._storage_merge_reader = None self._storage_merge_reader_on_hold = None self._task_manager = task_manager.TaskManager() self._task_queue = None self._task_queue_port = None self._task_storage_format = None self._worker_memory_limit = worker_memory_limit self._worker_timeout = worker_timeout
def _ProcessArchiveFile(self, file_entry): """Processes an archive file (file that contains file entries). Args: file_entry: A file entry object (instance of dfvfs.FileEntry). Returns: A boolean indicating if the file is an archive file. """ try: type_indicators = analyzer.Analyzer.GetArchiveTypeIndicators( file_entry.path_spec, resolver_context=self._resolver_context) except IOError as exception: logging.warning(( u'Analyzer failed to determine archive type indicators ' u'for file: {0:s} with error: {1:s}').format( self._current_display_name, exception)) # Make sure frame.f_locals does not keep a reference to file_entry. file_entry = None return False number_of_type_indicators = len(type_indicators) if number_of_type_indicators == 0: return False if number_of_type_indicators > 1: logging.debug(( u'Found multiple format type indicators: {0:s} for ' u'archive file: {1:s}').format( type_indicators, self._current_display_name)) for type_indicator in type_indicators: if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TAR: archive_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TAR, location=u'/', parent=file_entry.path_spec) elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_ZIP: archive_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_ZIP, location=u'/', parent=file_entry.path_spec) else: logging.debug(( u'Unsupported archive format type indicator: {0:s} for ' u'archive file: {1:s}').format( type_indicator, self._current_display_name)) archive_path_spec = None if archive_path_spec and self._process_archive_files: try: # TODO: make sure to handle the abort here. # TODO: change this to pass the archive file path spec to # the collector process and have the collector implement a maximum # path spec "depth" to prevent ZIP bombs and equiv. path_spec_extractor = extractors.PathSpecExtractor( self._resolver_context) for path_spec in path_spec_extractor.ExtractPathSpecs( [archive_path_spec]): # TODO: produce event sources to process. self._queue.PushItem(path_spec) self._produced_number_of_path_specs += 1 except IOError: logging.warning(u'Unable to process archive file:\n{0:s}'.format( self._current_display_name)) # Make sure frame.f_locals does not keep a reference to file_entry. file_entry = None return True
def testExtractPathSpecsStorageMediaImageWithPartitions(self): """Tests the ExtractPathSpecs function an image file with partitions. The image file contains 2 partitions, p1 and p2, both with a NFTS file systems. """ # Note that the source file is a RAW (VMDK flat) image. test_file_path = self._GetTestFilePath(['multi_partition_image.vmdk']) self._SkipIfPathNotExists(test_file_path) image_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path) p1_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION, location='/p1', part_index=2, start_offset=0x00010000, parent=image_path_spec) p1_file_system_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location='/', parent=p1_path_spec) p2_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION, location='/p2', part_index=3, start_offset=0x00510000, parent=image_path_spec) p2_file_system_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location='/', parent=p2_path_spec) test_extractor = extractors.PathSpecExtractor() resolver_context = context.Context() path_specs = list(test_extractor.ExtractPathSpecs( [p1_file_system_path_spec, p2_file_system_path_spec], resolver_context=resolver_context)) expected_paths_p1 = [ '/$AttrDef', '/$BadClus', '/$BadClus:$Bad', '/$Bitmap', '/$Boot', '/$Extend', '/$Extend/$ObjId', '/$Extend/$Quota', '/$Extend/$Reparse', '/$Extend/$RmMetadata', '/$Extend/$RmMetadata/$Repair', '/$Extend/$RmMetadata/$Repair:$Config', '/$Extend/$RmMetadata/$TxfLog', '/$LogFile', '/$MFT', '/$MFTMirr', '/$Secure', '/$Secure:$SDS', '/$UpCase', '/$Volume', '/file1.txt', '/file2.txt'] expected_paths_p2 = [ '/$AttrDef', '/$BadClus', '/$BadClus:$Bad', '/$Bitmap', '/$Boot', '/$Extend', '/$Extend/$ObjId', '/$Extend/$Quota', '/$Extend/$Reparse', '/$Extend/$RmMetadata', '/$Extend/$RmMetadata/$Repair', '/$Extend/$RmMetadata/$Repair:$Config', '/$Extend/$RmMetadata/$TxfLog', '/$LogFile', '/$MFT', '/$MFTMirr', '/$Secure', '/$Secure:$SDS', '/$UpCase', '/$Volume', '/file1_on_part_2.txt', '/file2_on_part_2.txt'] paths = self._GetFilePaths(path_specs) expected_paths = expected_paths_p1 expected_paths.extend(expected_paths_p2) self.assertEqual(len(path_specs), len(expected_paths)) self.assertEqual(sorted(paths), sorted(expected_paths))
def _ProcessArchiveTypes(self, mediator, path_spec, type_indicators): """Processes a data stream containing archive types such as: TAR or ZIP. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. path_spec (dfvfs.PathSpec): path specification. type_indicators(list[str]): dfVFS archive type indicators found in the data stream. """ number_of_type_indicators = len(type_indicators) if number_of_type_indicators == 0: return self.processing_status = definitions.PROCESSING_STATUS_COLLECTING if number_of_type_indicators > 1: display_name = mediator.GetDisplayName() logging.debug(( u'Found multiple format type indicators: {0:s} for ' u'archive file: {1:s}').format(type_indicators, display_name)) for type_indicator in type_indicators: if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TAR: archive_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TAR, location=u'/', parent=path_spec) elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_ZIP: archive_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_ZIP, location=u'/', parent=path_spec) else: archive_path_spec = None error_message = ( u'unsupported archive format type indicator: {0:s}').format( type_indicator) mediator.ProduceExtractionError( error_message, path_spec=path_spec) if archive_path_spec: try: path_spec_extractor = extractors.PathSpecExtractor( self._resolver_context) for path_spec in path_spec_extractor.ExtractPathSpecs( [archive_path_spec]): if self._abort: break event_source = event_sources.FileEntryEventSource( path_spec=path_spec) event_source.file_entry_type = ( dfvfs_definitions.FILE_ENTRY_TYPE_FILE) mediator.ProduceEventSource(event_source) self.last_activity_timestamp = time.time() except (IOError, errors.MaximumRecursionDepth) as exception: error_message = ( u'unable to process archive file with error: {0:s}').format( exception) mediator.ProduceExtractionError( error_message, path_spec=path_spec)