Esempio n. 1
0
  def testCalculateNTFSTimeHash(self):
    """Tests the _CalculateNTFSTimeHash function."""
    # Note that the source file is a RAW (VMDK flat) image.
    test_file = self._GetTestFilePath(['multi_partition_image.vmdk'])

    image_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)

    p1_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION, location='/p1',
        part_index=2, start_offset=0x00010000, parent=image_path_spec)
    p1_file_system_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location='/file1.txt',
        parent=p1_path_spec)

    file_entry = path_spec_resolver.Resolver.OpenFileEntry(
        p1_file_system_path_spec)

    test_extractor = extractors.PathSpecExtractor()
    hash_value = test_extractor._CalculateNTFSTimeHash(file_entry)
    self.assertEqual(hash_value, '6b181becbc9529b73cf3dc35c99a61e7')

    p2_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION, location='/p2',
        part_index=3, start_offset=0x00510000, parent=image_path_spec)
    p2_file_system_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location='/file2_on_part_2.txt',
        parent=p2_path_spec)

    file_entry = path_spec_resolver.Resolver.OpenFileEntry(
        p2_file_system_path_spec)

    test_extractor = extractors.PathSpecExtractor()
    hash_value = test_extractor._CalculateNTFSTimeHash(file_entry)
    self.assertEqual(hash_value, '8738ed1e707fec64cd1593fd81eb26d2')
Esempio n. 2
0
  def testExtractPathSpecsStorageMediaImage(self):
    """Tests the ExtractPathSpecs function an image file.

    The image file contains the following files:
    * logs/hidden.zip
    * logs/sys.tgz

    The hidden.zip file contains one file, syslog, which is the
    same for sys.tgz.

    The end results should therefore be:
    * logs/hidden.zip (unchanged)
    * logs/hidden.zip:syslog (the text file extracted out)
    * logs/sys.tgz (unchanged)
    * logs/sys.tgz (read as a GZIP file, so not compressed)
    * logs/sys.tgz:syslog.gz (A GZIP file from the TAR container)
    * logs/sys.tgz:syslog.gz:syslog (the extracted syslog file)

    This means that the collection script should collect 6 files in total.
    """
    test_file_path = self._GetTestFilePath(['syslog_image.dd'])
    self._SkipIfPathNotExists(test_file_path)

    volume_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path)
    source_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location='/',
        parent=volume_path_spec)

    resolver_context = context.Context()
    test_extractor = extractors.PathSpecExtractor()
    path_specs = list(test_extractor.ExtractPathSpecs(
        [source_path_spec], resolver_context=resolver_context))

    self.assertEqual(len(path_specs), 3)
Esempio n. 3
0
  def __init__(self, parser_filter_expression=None):
    """Initializes an event extraction worker.

    Args:
      parser_filter_expression (Optional[str]): parser filter expression,
          where None represents all parsers and plugins.

          A parser filter expression is a comma separated value string that
          denotes which parsers and plugins should be used. See
          filters/parser_filter.py for details of the expression syntax.

          This function does not support presets, and requires a parser
          filter expression where presets have been expanded.
    """
    super(EventExtractionWorker, self).__init__()
    self._abort = False
    self._analyzers = []
    self._analyzers_profiler = None
    self._event_extractor = extractors.EventExtractor(
        parser_filter_expression=parser_filter_expression)
    self._hasher_file_size_limit = None
    self._path_spec_extractor = extractors.PathSpecExtractor()
    self._process_archives = None
    self._process_compressed_streams = None
    self._processing_profiler = None

    self.last_activity_timestamp = 0.0
    self.processing_status = definitions.STATUS_INDICATOR_IDLE
  def __init__(
      self, maximum_number_of_tasks=_MAXIMUM_NUMBER_OF_TASKS):
    """Initializes an engine.

    Args:
      maximum_number_of_tasks (Optional[int]): maximum number of concurrent
          tasks, where 0 represents no limit.
    """
    super(TaskMultiProcessEngine, self).__init__()
    self._enable_sigsegv_handler = False
    self._last_worker_number = 0
    self._maximum_number_of_tasks = maximum_number_of_tasks
    self._merge_task = None
    self._merge_task_on_hold = None
    self._number_of_consumed_event_tags = 0
    self._number_of_consumed_events = 0
    self._number_of_consumed_reports = 0
    self._number_of_consumed_sources = 0
    self._number_of_consumed_warnings = 0
    self._number_of_produced_event_tags = 0
    self._number_of_produced_events = 0
    self._number_of_produced_reports = 0
    self._number_of_produced_sources = 0
    self._number_of_produced_warnings = 0
    self._number_of_worker_processes = 0
    self._path_spec_extractor = extractors.PathSpecExtractor()
    self._processing_configuration = None
    self._resolver_context = context.Context()
    self._session_identifier = None
    self._status = definitions.STATUS_INDICATOR_IDLE
    self._storage_merge_reader = None
    self._storage_merge_reader_on_hold = None
    self._task_queue = None
    self._task_queue_port = None
    self._task_manager = task_manager.TaskManager()
Esempio n. 5
0
    def _ExtractEventSources(self,
                             source_path_specs,
                             storage_writer,
                             filter_find_specs=None,
                             resolver_context=None):
        """Processes the sources and extract event sources.

    Args:
      source_path_specs: a list of path specifications (instances of
                         dfvfs.PathSpec) of the sources to process.
      resolver_context: resolver context (instance of dfvfs.Context).
      storage_writer: a storage writer object (instance of StorageWriter).
      filter_find_specs: optional list of filter find specifications (instances
                         of dfvfs.FindSpec).
    """
        path_spec_extractor = extractors.PathSpecExtractor(resolver_context)

        for path_spec in path_spec_extractor.ExtractPathSpecs(
                source_path_specs, find_specs=filter_find_specs):

            # TODO: determine if event sources should be DataStream or FileEntry
            # or both.
            event_source = event_sources.FileEntryEventSource(
                path_spec=path_spec)
            storage_writer.AddEventSource(event_source)
Esempio n. 6
0
    def __init__(self, parser_filter_expression=None):
        """Initializes an event extraction worker.

    Args:
      parser_filter_expression (Optional[str]): parser filter expression,
          where None represents all parsers and plugins.

          The parser filter expression is a comma separated value string that
          denotes a list of parser names to include and/or exclude. Each entry
          can have the value of:

          * An exact match of a list of parsers, or a preset (see
            plaso/parsers/presets.py for a full list of available presets).
          * A name of a single parser (case insensitive), e.g. msiecf.
          * A glob name for a single parser, e.g. '*msie*' (case insensitive).
    """
        super(EventExtractionWorker, self).__init__()
        self._abort = False
        self._analyzers = []
        self._event_extractor = extractors.EventExtractor(
            parser_filter_expression=parser_filter_expression)
        self._hasher_file_size_limit = None
        self._path_spec_extractor = extractors.PathSpecExtractor()
        self._process_archives = None
        self._process_compressed_streams = None
        self._processing_profiler = None

        self.last_activity_timestamp = 0.0
        self.processing_status = definitions.PROCESSING_STATUS_IDLE
Esempio n. 7
0
  def testExtractPathSpecsFileSystem(self):
    """Tests the ExtractPathSpecs function on the file system."""
    test_file_paths = []

    test_file_path = self._GetTestFilePath(['syslog.bz2'])
    self._SkipIfPathNotExists(test_file_path)
    test_file_paths.append(test_file_path)

    test_file_path = self._GetTestFilePath(['syslog.tgz'])
    self._SkipIfPathNotExists(test_file_path)
    test_file_paths.append(test_file_path)

    test_file_path = self._GetTestFilePath(['syslog.zip'])
    self._SkipIfPathNotExists(test_file_path)
    test_file_paths.append(test_file_path)

    test_file_path = self._GetTestFilePath(['wtmp.1'])
    self._SkipIfPathNotExists(test_file_path)
    test_file_paths.append(test_file_path)

    with shared_test_lib.TempDirectory() as temp_directory:
      for a_file in test_file_paths:
        shutil.copy(a_file, temp_directory)

      source_path_spec = path_spec_factory.Factory.NewPathSpec(
          dfvfs_definitions.TYPE_INDICATOR_OS, location=temp_directory)

      resolver_context = context.Context()
      test_extractor = extractors.PathSpecExtractor()
      path_specs = list(test_extractor.ExtractPathSpecs(
          [source_path_spec], resolver_context=resolver_context))

      self.assertEqual(len(path_specs), 4)
Esempio n. 8
0
    def __init__(self, input_reader=None, output_writer=None):
        """Initializes the CLI tool object.

    Args:
      input_reader (Optional[InputReader]): input reader, where None indicates
          that the stdin input reader should be used.
      output_writer (Optional[OutputWriter]): output writer, where None
          indicates that the stdout output writer should be used.
    """
        super(ImageExportTool, self).__init__(input_reader=input_reader,
                                              output_writer=output_writer)
        self._abort = False
        self._artifact_definitions_path = None
        self._artifact_filters = None
        self._artifacts_registry = None
        self._custom_artifacts_path = None
        self._destination_path = None
        self._digests = {}
        self._filter_collection = file_entry_filters.FileEntryFilterCollection(
        )
        self._filter_file = None
        self._knowledge_base = knowledge_base.KnowledgeBase()
        self._path_spec_extractor = extractors.PathSpecExtractor()
        self._process_memory_limit = None
        self._resolver_context = context.Context()
        self._skip_duplicates = True

        self.has_filters = False
        self.list_signature_identifiers = False
Esempio n. 9
0
 def __init__(self):
   """Initializes the front-end object."""
   super(ImageExportFrontend, self).__init__()
   self._abort = False
   self._digests = {}
   self._filter_collection = FileEntryFilterCollection()
   self._knowledge_base = None
   self._path_spec_extractor = extractors.PathSpecExtractor()
   self._resolver_context = context.Context()
Esempio n. 10
0
 def __init__(self):
     """Initializes a single process engine."""
     super(SingleProcessEngine, self).__init__()
     self._current_display_name = ''
     self._last_status_update_timestamp = 0.0
     self._path_spec_extractor = extractors.PathSpecExtractor()
     self._pid = os.getpid()
     self._process_information = process_info.ProcessInfo(self._pid)
     self._processing_configuration = None
     self._status_update_callback = None
Esempio n. 11
0
  def _ProcessSources(
      self, source_path_specs, storage_writer, filter_find_specs=None):
    """Processes the sources.

    Args:
      source_path_specs (list[dfvfs.PathSpec]): path specifications of
          the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      filter_find_specs (Optional[list[dfvfs.FindSpec]]): find specifications
          used in path specification extraction. If set, path specs that match
          the find specification will be processed.
    """
    if self._processing_profiler:
      self._processing_profiler.StartTiming(u'process_sources')

    self._status = definitions.PROCESSING_STATUS_COLLECTING
    self._number_of_consumed_errors = 0
    self._number_of_consumed_event_tags = 0
    self._number_of_consumed_events = 0
    self._number_of_consumed_reports = 0
    self._number_of_consumed_sources = 0
    self._number_of_produced_errors = 0
    self._number_of_produced_event_tags = 0
    self._number_of_produced_events = 0
    self._number_of_produced_reports = 0
    self._number_of_produced_sources = 0

    path_spec_extractor = extractors.PathSpecExtractor(self._resolver_context)

    for path_spec in path_spec_extractor.ExtractPathSpecs(
        source_path_specs, find_specs=filter_find_specs,
        recurse_file_system=False):
      if self._abort:
        break

      # TODO: determine if event sources should be DataStream or FileEntry
      # or both.
      event_source = event_sources.FileEntryEventSource(path_spec=path_spec)
      storage_writer.AddEventSource(event_source)

      self._number_of_produced_sources = storage_writer.number_of_event_sources

    self._ScheduleTasks(storage_writer)

    if self._abort:
      self._status = definitions.PROCESSING_STATUS_ABORTED
    else:
      self._status = definitions.PROCESSING_STATUS_COMPLETED

    self._number_of_produced_errors = storage_writer.number_of_errors
    self._number_of_produced_events = storage_writer.number_of_events
    self._number_of_produced_sources = storage_writer.number_of_event_sources

    if self._processing_profiler:
      self._processing_profiler.StopTiming(u'process_sources')
Esempio n. 12
0
  def testExtractPathSpecsFileSystemWithFindSpecs(self):
    """Tests the ExtractPathSpecs function with find specifications."""
    test_file_path = self._GetTestFilePath(['System.evtx'])
    self._SkipIfPathNotExists(test_file_path)

    test_file_path = self._GetTestFilePath(['testdir', 'filter_1.txt'])
    self._SkipIfPathNotExists(test_file_path)

    test_file_path = self._GetTestFilePath(['testdir', 'filter_3.txt'])
    self._SkipIfPathNotExists(test_file_path)

    location_expressions = [
        '/test_data/testdir/filter_.+.txt',
        '/test_data/.+evtx',
        '/AUTHORS',
        '/does_not_exist/some_file_[0-9]+txt']

    source_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location='.')

    resolver_context = context.Context()
    test_extractor = extractors.PathSpecExtractor()

    find_specs = self._GetFindSpecs(location_expressions)
    path_specs = list(test_extractor.ExtractPathSpecs(
        [source_path_spec], find_specs=find_specs,
        resolver_context=resolver_context))

    # Two files with test_data/testdir/filter_*.txt, AUTHORS,
    # test_data/System.evtx and test_data/System2.evtx and
    # a symbolic link test_data/link_to_System.evtx.
    self.assertEqual(len(path_specs), 6)

    paths = self._GetFilePaths(path_specs)

    current_directory = os.getcwd()

    expected_path = os.path.join(
        current_directory, 'test_data', 'testdir', 'filter_1.txt')
    self.assertTrue(expected_path in paths)

    expected_path = os.path.join(
        current_directory, 'test_data', 'testdir', 'filter_2.txt')
    self.assertFalse(expected_path in paths)

    expected_path = os.path.join(
        current_directory, 'test_data', 'testdir', 'filter_3.txt')
    self.assertTrue(expected_path in paths)

    expected_path = os.path.join(current_directory, 'AUTHORS')
    self.assertTrue(expected_path in paths)
Esempio n. 13
0
    def testExtractPathSpecsFileSystemWithFilter(self):
        """Tests the ExtractPathSpecs function on the file system with a filter."""
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=u'.')

        filter_name = ''
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            filter_name = temp_file.name
            temp_file.write('/test_data/testdir/filter_.+.txt\n')
            temp_file.write('/test_data/.+evtx\n')
            temp_file.write('/AUTHORS\n')
            temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')

        resolver_context = context.Context()
        test_extractor = extractors.PathSpecExtractor(resolver_context)

        find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
        path_specs = list(
            test_extractor.ExtractPathSpecs([source_path_spec],
                                            find_specs=find_specs))

        try:
            os.remove(filter_name)
        except (OSError, IOError) as exception:
            logging.warning(
                (u'Unable to remove temporary file: {0:s} with error: {1:s}'
                 ).format(filter_name, exception))

        # Two files with test_data/testdir/filter_*.txt, AUTHORS
        # and test_data/System.evtx.
        self.assertEqual(len(path_specs), 4)

        paths = self._GetFilePaths(path_specs)

        current_directory = os.getcwd()

        expected_path = os.path.join(current_directory, u'test_data',
                                     u'testdir', u'filter_1.txt')
        self.assertTrue(expected_path in paths)

        expected_path = os.path.join(current_directory, u'test_data',
                                     u'testdir', u'filter_2.txt')
        self.assertFalse(expected_path in paths)

        expected_path = os.path.join(current_directory, u'test_data',
                                     u'testdir', u'filter_3.txt')
        self.assertTrue(expected_path in paths)

        expected_path = os.path.join(current_directory, u'AUTHORS')
        self.assertTrue(expected_path in paths)
Esempio n. 14
0
    def testExtractPathSpecsStorageMediaImageWithFilter(self):
        """Tests the ExtractPathSpecs function on an image file with a filter."""
        test_file = self._GetTestFilePath([u'ímynd.dd'])

        volume_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location=u'/',
            parent=volume_path_spec)

        filter_name = ''
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            filter_name = temp_file.name
            temp_file.write('/a_directory/.+zip\n')
            temp_file.write('/a_directory/another.+\n')
            temp_file.write('/passwords.txt\n')

        resolver_context = context.Context()
        test_extractor = extractors.PathSpecExtractor(resolver_context)

        find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
        path_specs = list(
            test_extractor.ExtractPathSpecs([source_path_spec],
                                            find_specs=find_specs))

        try:
            os.remove(filter_name)
        except (OSError, IOError) as exception:
            logging.warning(
                (u'Unable to remove temporary file: {0:s} with error: {1:s}'
                 ).format(filter_name, exception))

        self.assertEqual(len(path_specs), 2)

        paths = self._GetFilePaths(path_specs)

        # path_specs[0]
        # type: TSK
        # file_path: '/a_directory/another_file'
        # container_path: 'test_data/ímynd.dd'
        # image_offset: 0
        self.assertEqual(paths[0], u'/a_directory/another_file')

        # path_specs[1]
        # type: TSK
        # file_path: '/passwords.txt'
        # container_path: 'test_data/ímynd.dd'
        # image_offset: 0
        self.assertEqual(paths[1], u'/passwords.txt')
Esempio n. 15
0
 def __init__(self):
     """Initializes a single process engine."""
     super(SingleProcessEngine, self).__init__()
     self._current_display_name = ''
     self._extraction_worker = None
     self._file_system_cache = []
     self._number_of_consumed_sources = 0
     self._parsers_counter = None
     self._path_spec_extractor = extractors.PathSpecExtractor()
     self._pid = os.getpid()
     self._process_information = process_info.ProcessInfo(self._pid)
     self._processing_configuration = None
     self._resolver_context = None
     self._status = definitions.STATUS_INDICATOR_IDLE
     self._status_update_active = False
     self._status_update_callback = None
     self._status_update_thread = None
     self._storage_writer = None
Esempio n. 16
0
    def __init__(self,
                 maximum_number_of_tasks=_MAXIMUM_NUMBER_OF_TASKS,
                 use_zeromq=True):
        """Initializes an engine object.

    Args:
      maximum_number_of_tasks (Optional[int]): maximum number of concurrent
          tasks, where 0 represents no limit.
      use_zeromq (Optional[bool]): True if ZeroMQ should be used for queuing
          instead of Python's multiprocessing queue.
    """
        super(TaskMultiProcessEngine, self).__init__()
        self._enable_sigsegv_handler = False
        self._filter_find_specs = None
        self._last_worker_number = 0
        self._maximum_number_of_tasks = maximum_number_of_tasks
        self._memory_profiler = None
        self._merge_task = None
        self._merge_task_on_hold = None
        self._number_of_consumed_errors = 0
        self._number_of_consumed_event_tags = 0
        self._number_of_consumed_events = 0
        self._number_of_consumed_reports = 0
        self._number_of_consumed_sources = 0
        self._number_of_produced_errors = 0
        self._number_of_produced_event_tags = 0
        self._number_of_produced_events = 0
        self._number_of_produced_reports = 0
        self._number_of_produced_sources = 0
        self._number_of_worker_processes = 0
        self._path_spec_extractor = extractors.PathSpecExtractor()
        self._processing_configuration = None
        self._processing_profiler = None
        self._resolver_context = context.Context()
        self._serializers_profiler = None
        self._session_identifier = None
        self._status = definitions.PROCESSING_STATUS_IDLE
        self._storage_merge_reader = None
        self._storage_merge_reader_on_hold = None
        self._task_queue = None
        self._task_queue_port = None
        self._task_manager = task_manager.TaskManager()
        self._use_zeromq = use_zeromq
Esempio n. 17
0
    def testExtractPathSpecsStorageMediaImageWithFilter(self):
        """Tests the ExtractPathSpecs function on an image file with a filter."""
        location_expressions = [
            '/a_directory/.+zip', '/a_directory/another.+', '/passwords.txt'
        ]

        test_file_path = self._GetTestFilePath(['ímynd.dd'])
        self._SkipIfPathNotExists(test_file_path)

        volume_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path)
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location='/',
            parent=volume_path_spec)

        resolver_context = context.Context()
        test_extractor = extractors.PathSpecExtractor()

        find_specs = self._GetFindSpecs(location_expressions)
        path_specs = list(
            test_extractor.ExtractPathSpecs([source_path_spec],
                                            find_specs=find_specs,
                                            resolver_context=resolver_context))

        self.assertEqual(len(path_specs), 2)

        paths = self._GetFilePaths(path_specs)

        # path_specs[0]
        # path_spec_type: TSK
        # file_path: '/a_directory/another_file'
        # container_path: 'test_data/ímynd.dd'
        # image_offset: 0
        self.assertEqual(paths[0], '/a_directory/another_file')

        # path_specs[1]
        # path_spec_type: TSK
        # file_path: '/passwords.txt'
        # container_path: 'test_data/ímynd.dd'
        # image_offset: 0
        self.assertEqual(paths[1], '/passwords.txt')
Esempio n. 18
0
    def _Extract(self,
                 source_path_specs,
                 destination_path,
                 output_writer,
                 skip_duplicates=True):
        """Extracts files.

    Args:
      source_path_specs (list[dfvfs.PathSpec]): path specifications to extract.
      destination_path (str): path where the extracted files should be stored.
      output_writer (CLIOutputWriter): output writer.
      skip_duplicates (Optional[bool]): True if files with duplicate content
          should be skipped.
    """
        output_writer.Write(u'Extracting file entries.\n')
        path_spec_extractor = extractors.PathSpecExtractor(
            self._resolver_context)
        for path_spec in path_spec_extractor.ExtractPathSpecs(
                source_path_specs):
            self._ExtractFileEntry(path_spec,
                                   destination_path,
                                   output_writer,
                                   skip_duplicates=skip_duplicates)
Esempio n. 19
0
    def _Extract(self,
                 source_path_specs,
                 destination_path,
                 remove_duplicates=True):
        """Extracts files.

    Args:
      source_path_specs: list of path specifications (instances of
                         dfvfs.PathSpec) to process.
      destination_path: the path where the extracted files should be stored.
      remove_duplicates: optional boolean value to indicate if files with
                         duplicate content should be removed. The default
                         is True.
    """
        if not os.path.isdir(destination_path):
            os.makedirs(destination_path)

        path_spec_extractor = extractors.PathSpecExtractor(
            self._resolver_context)
        file_saver = FileSaver(skip_duplicates=remove_duplicates)

        for path_spec in path_spec_extractor.ExtractPathSpecs(
                source_path_specs):
            self._ExtractFile(file_saver, path_spec, destination_path)
Esempio n. 20
0
    def __init__(self,
                 maximum_number_of_tasks=None,
                 number_of_worker_processes=0,
                 worker_memory_limit=None,
                 worker_timeout=None):
        """Initializes an engine.

    Args:
      maximum_number_of_tasks (Optional[int]): maximum number of concurrent
          tasks, where 0 represents no limit.
      number_of_worker_processes (Optional[int]): number of worker processes.
      worker_memory_limit (Optional[int]): maximum amount of memory a worker is
          allowed to consume, where None represents the default memory limit
          and 0 represents no limit.
      worker_timeout (Optional[float]): number of minutes before a worker
          process that is not providing status updates is considered inactive,
          where None or 0.0 represents the default timeout.
    """
        if maximum_number_of_tasks is None:
            maximum_number_of_tasks = self._MAXIMUM_NUMBER_OF_TASKS

        if number_of_worker_processes < 1:
            # One worker for each "available" CPU (minus other processes).
            # The number here is derived from the fact that the engine starts up:
            # * A main process.
            #
            # If we want to utilize all CPUs on the system we therefore need to start
            # up workers that amounts to the total number of CPUs - the other
            # processes.
            try:
                cpu_count = multiprocessing.cpu_count() - 1

                if cpu_count <= self._WORKER_PROCESSES_MINIMUM:
                    cpu_count = self._WORKER_PROCESSES_MINIMUM

                elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM:
                    cpu_count = self._WORKER_PROCESSES_MAXIMUM

            except NotImplementedError:
                logger.error((
                    'Unable to determine number of CPUs defaulting to {0:d} worker '
                    'processes.').format(self._WORKER_PROCESSES_MINIMUM))
                cpu_count = self._WORKER_PROCESSES_MINIMUM

            number_of_worker_processes = cpu_count

        if worker_memory_limit is None:
            worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT

        if not worker_timeout:
            worker_timeout = definitions.DEFAULT_WORKER_TIMEOUT

        super(ExtractionMultiProcessEngine, self).__init__()
        self._enable_sigsegv_handler = False
        self._last_worker_number = 0
        self._maximum_number_of_containers = 50
        self._maximum_number_of_tasks = maximum_number_of_tasks
        self._merge_task = None
        self._merge_task_on_hold = None
        self._number_of_consumed_events = 0
        self._number_of_consumed_event_tags = 0
        self._number_of_consumed_extraction_warnings = 0
        self._number_of_consumed_reports = 0
        self._number_of_consumed_sources = 0
        self._number_of_produced_events = 0
        self._number_of_produced_event_tags = 0
        self._number_of_produced_extraction_warnings = 0
        self._number_of_produced_reports = 0
        self._number_of_produced_sources = 0
        self._number_of_worker_processes = number_of_worker_processes
        self._parsers_counter = None
        self._path_spec_extractor = extractors.PathSpecExtractor()
        self._resolver_context = context.Context()
        self._status = definitions.STATUS_INDICATOR_IDLE
        self._storage_merge_reader = None
        self._storage_merge_reader_on_hold = None
        self._task_manager = task_manager.TaskManager()
        self._task_queue = None
        self._task_queue_port = None
        self._task_storage_format = None
        self._worker_memory_limit = worker_memory_limit
        self._worker_timeout = worker_timeout
Esempio n. 21
0
  def _ProcessArchiveFile(self, file_entry):
    """Processes an archive file (file that contains file entries).

    Args:
      file_entry: A file entry object (instance of dfvfs.FileEntry).

    Returns:
      A boolean indicating if the file is an archive file.
    """
    try:
      type_indicators = analyzer.Analyzer.GetArchiveTypeIndicators(
          file_entry.path_spec, resolver_context=self._resolver_context)
    except IOError as exception:
      logging.warning((
          u'Analyzer failed to determine archive type indicators '
          u'for file: {0:s} with error: {1:s}').format(
              self._current_display_name, exception))

      # Make sure frame.f_locals does not keep a reference to file_entry.
      file_entry = None
      return False

    number_of_type_indicators = len(type_indicators)
    if number_of_type_indicators == 0:
      return False

    if number_of_type_indicators > 1:
      logging.debug((
          u'Found multiple format type indicators: {0:s} for '
          u'archive file: {1:s}').format(
              type_indicators, self._current_display_name))

    for type_indicator in type_indicators:
      if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TAR:
        archive_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TAR, location=u'/',
            parent=file_entry.path_spec)

      elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_ZIP:
        archive_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_ZIP, location=u'/',
            parent=file_entry.path_spec)

      else:
        logging.debug((
            u'Unsupported archive format type indicator: {0:s} for '
            u'archive file: {1:s}').format(
                type_indicator, self._current_display_name))

        archive_path_spec = None

      if archive_path_spec and self._process_archive_files:
        try:
          # TODO: make sure to handle the abort here.

          # TODO: change this to pass the archive file path spec to
          # the collector process and have the collector implement a maximum
          # path spec "depth" to prevent ZIP bombs and equiv.
          path_spec_extractor = extractors.PathSpecExtractor(
              self._resolver_context)

          for path_spec in path_spec_extractor.ExtractPathSpecs(
              [archive_path_spec]):
            # TODO: produce event sources to process.
            self._queue.PushItem(path_spec)
            self._produced_number_of_path_specs += 1

        except IOError:
          logging.warning(u'Unable to process archive file:\n{0:s}'.format(
              self._current_display_name))

          # Make sure frame.f_locals does not keep a reference to file_entry.
          file_entry = None

    return True
Esempio n. 22
0
  def testExtractPathSpecsStorageMediaImageWithPartitions(self):
    """Tests the ExtractPathSpecs function an image file with partitions.

    The image file contains 2 partitions, p1 and p2, both with a NFTS
    file systems.
    """
    # Note that the source file is a RAW (VMDK flat) image.
    test_file_path = self._GetTestFilePath(['multi_partition_image.vmdk'])
    self._SkipIfPathNotExists(test_file_path)

    image_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path)

    p1_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION, location='/p1',
        part_index=2, start_offset=0x00010000, parent=image_path_spec)
    p1_file_system_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location='/',
        parent=p1_path_spec)

    p2_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION, location='/p2',
        part_index=3, start_offset=0x00510000, parent=image_path_spec)
    p2_file_system_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location='/',
        parent=p2_path_spec)

    test_extractor = extractors.PathSpecExtractor()

    resolver_context = context.Context()
    path_specs = list(test_extractor.ExtractPathSpecs(
        [p1_file_system_path_spec, p2_file_system_path_spec],
        resolver_context=resolver_context))

    expected_paths_p1 = [
        '/$AttrDef',
        '/$BadClus',
        '/$BadClus:$Bad',
        '/$Bitmap',
        '/$Boot',
        '/$Extend',
        '/$Extend/$ObjId',
        '/$Extend/$Quota',
        '/$Extend/$Reparse',
        '/$Extend/$RmMetadata',
        '/$Extend/$RmMetadata/$Repair',
        '/$Extend/$RmMetadata/$Repair:$Config',
        '/$Extend/$RmMetadata/$TxfLog',
        '/$LogFile',
        '/$MFT',
        '/$MFTMirr',
        '/$Secure',
        '/$Secure:$SDS',
        '/$UpCase',
        '/$Volume',
        '/file1.txt',
        '/file2.txt']

    expected_paths_p2 = [
        '/$AttrDef',
        '/$BadClus',
        '/$BadClus:$Bad',
        '/$Bitmap',
        '/$Boot',
        '/$Extend',
        '/$Extend/$ObjId',
        '/$Extend/$Quota',
        '/$Extend/$Reparse',
        '/$Extend/$RmMetadata',
        '/$Extend/$RmMetadata/$Repair',
        '/$Extend/$RmMetadata/$Repair:$Config',
        '/$Extend/$RmMetadata/$TxfLog',
        '/$LogFile',
        '/$MFT',
        '/$MFTMirr',
        '/$Secure',
        '/$Secure:$SDS',
        '/$UpCase',
        '/$Volume',
        '/file1_on_part_2.txt',
        '/file2_on_part_2.txt']

    paths = self._GetFilePaths(path_specs)
    expected_paths = expected_paths_p1
    expected_paths.extend(expected_paths_p2)

    self.assertEqual(len(path_specs), len(expected_paths))
    self.assertEqual(sorted(paths), sorted(expected_paths))
Esempio n. 23
0
  def _ProcessArchiveTypes(self, mediator, path_spec, type_indicators):
    """Processes a data stream containing archive types such as: TAR or ZIP.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      path_spec (dfvfs.PathSpec): path specification.
      type_indicators(list[str]): dfVFS archive type indicators found in
          the data stream.
    """
    number_of_type_indicators = len(type_indicators)
    if number_of_type_indicators == 0:
      return

    self.processing_status = definitions.PROCESSING_STATUS_COLLECTING

    if number_of_type_indicators > 1:
      display_name = mediator.GetDisplayName()
      logging.debug((
          u'Found multiple format type indicators: {0:s} for '
          u'archive file: {1:s}').format(type_indicators, display_name))

    for type_indicator in type_indicators:
      if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TAR:
        archive_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TAR, location=u'/',
            parent=path_spec)

      elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_ZIP:
        archive_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_ZIP, location=u'/',
            parent=path_spec)

      else:
        archive_path_spec = None

        error_message = (
            u'unsupported archive format type indicator: {0:s}').format(
                type_indicator)
        mediator.ProduceExtractionError(
            error_message, path_spec=path_spec)

      if archive_path_spec:
        try:
          path_spec_extractor = extractors.PathSpecExtractor(
              self._resolver_context)

          for path_spec in path_spec_extractor.ExtractPathSpecs(
              [archive_path_spec]):
            if self._abort:
              break

            event_source = event_sources.FileEntryEventSource(
                path_spec=path_spec)
            event_source.file_entry_type = (
                dfvfs_definitions.FILE_ENTRY_TYPE_FILE)
            mediator.ProduceEventSource(event_source)

            self.last_activity_timestamp = time.time()

        except (IOError, errors.MaximumRecursionDepth) as exception:
          error_message = (
              u'unable to process archive file with error: {0:s}').format(
                  exception)
          mediator.ProduceExtractionError(
              error_message, path_spec=path_spec)