Example #1
0
  def __init__(self, collection_queue, storage_queue, parse_error_queue):
    """Initialize the engine object.

    Args:
      collection_queue: the collection queue object (instance of Queue).
      storage_queue: the storage queue object (instance of Queue).
      parse_error_queue: the parser error queue object (instance of Queue).
    """
    self._collection_queue = collection_queue
    self._enable_debug_output = False
    self._enable_profiling = False
    self._event_queue_producer = queue.ItemQueueProducer(storage_queue)
    self._filter_object = None
    self._mount_path = None
    self._parse_error_queue = parse_error_queue
    self._parse_error_queue_producer = queue.ItemQueueProducer(
        parse_error_queue)
    self._process_archive_files = False
    self._profiling_sample_rate = 1000
    self._source = None
    self._source_path_spec = None
    self._source_file_entry = None
    self._text_prepend = None

    self.knowledge_base = knowledge_base.KnowledgeBase()
    self.storage_queue = storage_queue
Example #2
0
  def _GetParserContext(
     self, event_queue, parse_error_queue, knowledge_base_values=None):
    """Retrieves a parser context object.

    Args:
      event_queue: the event queue (instance of Queue).
      parse_error_queue: the parse error queue (instance of Queue).
      knowledge_base_values: optional dict containing the knowledge base
                             values. The default is None.

    Returns:
      A parser context object (instance of ParserContext).
    """
    event_queue_producer = queue.EventObjectQueueProducer(event_queue)
    parse_error_queue_producer = queue.ParseErrorQueueProducer(
        parse_error_queue)

    knowledge_base_object = knowledge_base.KnowledgeBase()
    if knowledge_base_values:
      for identifier, value in knowledge_base_values.iteritems():
        knowledge_base_object.SetValue(identifier, value)

    return context.ParserContext(
        event_queue_producer, parse_error_queue_producer,
        knowledge_base_object)
Example #3
0
  def testExtractionWorkerHashing(self):
    """Test that the worker sets up and runs hashing code correctly."""
    collection_queue = single_process.SingleProcessQueue()
    storage_queue = single_process.SingleProcessQueue()
    parse_error_queue = single_process.SingleProcessQueue()
    event_queue_producer = single_process.SingleProcessItemQueueProducer(
        storage_queue)
    parse_error_queue_producer = single_process.SingleProcessItemQueueProducer(
        parse_error_queue)

    knowledge_base_object = knowledge_base.KnowledgeBase()

    parser_mediator = parsers_mediator.ParserMediator(
        event_queue_producer, parse_error_queue_producer,
        knowledge_base_object)

    resolver_context = context.Context()

    extraction_worker = worker.BaseEventExtractionWorker(
        0, collection_queue, event_queue_producer, parse_error_queue_producer,
        parser_mediator, resolver_context=resolver_context)

    # We're going to check that the worker set up its internal state correctly.
    # pylint: disable=protected-access
    extraction_worker.SetHashers(hasher_names_string=u'md5')
    self.assertEqual(1, len(extraction_worker._hasher_names))

    extraction_worker.InitializeParserObjects()
Example #4
0
  def testGetValue(self):
    """Tests the GetValue function."""
    knowledge_base_object = knowledge_base.KnowledgeBase()

    plugin = linux.LinuxHostname()
    plugin.Run(self._searcher, knowledge_base_object)

    self.assertEquals(knowledge_base_object.hostname, u'plaso.kiddaland.net')
Example #5
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        plugin = macosx.MacOSXHostname()
        plugin.Run(self._searcher, knowledge_base_object)

        self.assertEqual(knowledge_base_object.hostname, u'Plaso\'s Mac mini')
Example #6
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        plugin = windows.WindowsSystemRegistryPath()
        plugin.Run(self._searcher, knowledge_base_object)

        path = knowledge_base_object.GetValue('sysregistry')
        self.assertEqual(path, u'/Windows/System32/config')
Example #7
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        plugin = macosx.MacOSXBuild()
        plugin.Run(self._searcher, knowledge_base_object)

        build = knowledge_base_object.GetValue('build')
        self.assertEqual(build, u'10.9.2')
Example #8
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        plugin = windows.WindowsSystemRootPath()
        plugin.Run(self._searcher, knowledge_base_object)

        path = knowledge_base_object.GetValue('systemroot')
        self.assertEquals(path, u'/Windows')
Example #9
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        plugin = macosx.MacOSXTimeZone()
        plugin.Run(self._searcher, knowledge_base_object)

        time_zone_str = knowledge_base_object.GetValue('time_zone_str')
        self.assertEqual(time_zone_str, u'Europe/Amsterdam')
Example #10
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        plugin = macosx.MacOSXKeyboard()
        plugin.Run(self._searcher, knowledge_base_object)

        keyboard_layout = knowledge_base_object.GetValue('keyboard_layout')
        self.assertEqual(keyboard_layout, u'US')
Example #11
0
File: preg.py Project: f-s-p/plaso
class PregCache(object):
  """Cache storage used for iPython and other aspects of preg."""

  events_from_last_parse = []

  knowledge_base_object = knowledge_base.KnowledgeBase()

  # Parser mediator, used when parsing Registry keys.
  parser_mediator = None

  hive_storage = None
  shell_helper = None
Example #12
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        # The plug-in needs to expand {sysregistry} so we need to run
        # the WindowsSystemRegistryPath plug-in first.
        plugin = windows.WindowsSystemRegistryPath()
        plugin.Run(self._searcher, knowledge_base_object)

        plugin = windows.WindowsCodepage()
        plugin.Run(self._searcher, knowledge_base_object)

        self.assertEqual(knowledge_base_object.codepage, u'cp1252')
Example #13
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        # The plug-in needs to expand {sysregistry} so we need to run
        # the WindowsSystemRegistryPath plug-in first.
        plugin = windows.WindowsSystemRegistryPath()
        plugin.Run(self._searcher, knowledge_base_object)

        plugin = windows.WindowsHostname()
        plugin.Run(self._searcher, knowledge_base_object)

        self.assertEquals(knowledge_base_object.hostname, u'WKS-WIN732BITA')
Example #14
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        # The plug-in needs to expand {sysregistry} so we need to run
        # the WindowsSystemRegistryPath plug-in first.
        plugin = windows.WindowsSystemRegistryPath()
        plugin.Run(self._searcher, knowledge_base_object)

        plugin = windows.WindowsProgramFilesPath()
        plugin.Run(self._searcher, knowledge_base_object)

        path = knowledge_base_object.GetValue('programfiles')
        self.assertEquals(path, u'Program Files')
Example #15
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        # The plug-in needs to expand {sysregistry} so we need to run
        # the WindowsSystemRegistryPath plug-in first.
        plugin = windows.WindowsSystemRegistryPath()
        plugin.Run(self._searcher, knowledge_base_object)

        plugin = windows.WindowsVersion()
        plugin.Run(self._searcher, knowledge_base_object)

        osversion = knowledge_base_object.GetValue('osversion')
        self.assertEquals(osversion, u'Windows 7 Ultimate')
Example #16
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        # The plug-in needs to expand {sysregistry} so we need to run
        # the WindowsSystemRegistryPath plug-in first.
        plugin = windows.WindowsSystemRegistryPath()
        plugin.Run(self._searcher, knowledge_base_object)

        plugin = windows.WindowsTimeZone()
        plugin.Run(self._searcher, knowledge_base_object)

        time_zone_str = knowledge_base_object.GetValue('time_zone_str')
        self.assertEquals(time_zone_str, u'EST5EDT')
Example #17
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        plugin = linux.LinuxUsernames()
        plugin.Run(self._searcher, knowledge_base_object)

        users = knowledge_base_object.GetValue('users')
        self.assertEqual(len(users), 13)

        self.assertEqual(users[11].get('uid', None), u'14')
        self.assertEqual(users[11].get('gid', None), u'50')
        self.assertEqual(users[11].get('name', None), u'ftp')
        self.assertEqual(users[11].get('path', None), u'/var/ftp')
        self.assertEqual(users[11].get('shell', None), u'/sbin/nologin')
Example #18
0
    def _SetUpKnowledgeBase(self, knowledge_base_values=None):
        """Sets up a knowledge base.

    Args:
      knowledge_base_values: optional dict containing the knowledge base
                             values. The default is None.

    Returns:
      An knowledge base object (instance of KnowledgeBase).
    """
        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.iteritems():
                knowledge_base_object.SetValue(identifier, value)

        return knowledge_base_object
Example #19
0
  def __init__(self, collection_queue, storage_queue, parse_error_queue):
    """Initialize the engine object.

    Args:
      collection_queue: the collection queue object (instance of Queue).
      storage_queue: the storage queue object (instance of Queue).
      parse_error_queue: the parser error queue object (instance of Queue).
    """
    self._collection_queue = collection_queue
    self._source = None
    self._source_path_spec = None
    self._source_file_entry = None
    self._event_queue_producer = queue.EventObjectQueueProducer(storage_queue)
    self._parse_error_queue_producer = queue.ParseErrorQueueProducer(
        parse_error_queue)
    self.knowledge_base = knowledge_base.KnowledgeBase()
Example #20
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        # The plug-in needs to expand {sysregistry} so we need to run
        # the WindowsSystemRegistryPath plug-in first.
        plugin = windows.WindowsSystemRegistryPath()
        plugin.Run(self._searcher, knowledge_base_object)

        plugin = windows.WindowsProgramFilesX86Path()

        plugin.Run(self._searcher, knowledge_base_object)

        path = knowledge_base_object.GetValue('programfilesx86')
        # The test SOFTWARE Registry file does not contain a value for
        # the Program Files X86 path.
        self.assertEquals(path, None)
Example #21
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        # The plug-in needs to expand {sysregistry} so we need to run
        # the WindowsSystemRegistryPath plug-in first.
        plugin = windows.WindowsSystemRegistryPath()
        plugin.Run(self._searcher, knowledge_base_object)

        plugin = windows.WindowsUsers()
        plugin.Run(self._searcher, knowledge_base_object)

        users = knowledge_base_object.GetValue('users')
        self.assertEquals(len(users), 11)

        expected_sid = u'S-1-5-21-2036804247-3058324640-2116585241-1114'
        self.assertEquals(users[9].get('sid', None), expected_sid)
        self.assertEquals(users[9].get('name', None), u'rsydow')
        self.assertEquals(users[9].get('path', None), u'C:\\Users\\rsydow')
Example #22
0
    def testGetValue(self):
        """Tests the GetValue function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()

        plugin = macosx.MacOSXUsers()
        plugin.Run(self._searcher, knowledge_base_object)

        users = knowledge_base_object.GetValue('users')
        self.assertEqual(len(users), 1)

        # TODO: fix the parsing of the following values to match the behavior on
        # Mac OS X.

        # The string -2 is converted into the integer -1.
        self.assertEqual(users[0].get('uid', None), -1)
        # 'home' is 0 which represents: /var/empty but we convert it
        # into u'<not set>'.
        self.assertEqual(users[0].get('path', None), u'<not set>')
        # 'name' is 0 which represents: nobody but we convert it into u'<not set>'.
        self.assertEqual(users[0].get('name', None), u'<not set>')
        # 'realname' is 0 which represents: 'Unprivileged User' but we convert it
        # into u'N/A'.
        self.assertEqual(users[0].get('realname', None), u'N/A')
Example #23
0
    def _Preprocess(self, searcher):
        """Preprocesses the image.

    Args:
      searcher: The file system searcher object (instance of
                dfvfs.FileSystemSearcher).
    """
        if self._knowledge_base is not None:
            return

        self._knowledge_base = knowledge_base.KnowledgeBase()

        logging.info(u'Guessing OS')

        platform = preprocess_interface.GuessOS(searcher)
        logging.info(u'OS: {0:s}'.format(platform))

        logging.info(u'Running preprocess.')

        preprocess_manager.PreprocessPluginsManager.RunPlugins(
            platform, searcher, self._knowledge_base)

        logging.info(u'Preprocess done, saving files from image.')
Example #24
0
    def _GetParserMediator(self,
                           event_queue,
                           parse_error_queue,
                           knowledge_base_values=None,
                           file_entry=None,
                           parser_chain=None):
        """Retrieves a parser context object.

    Args:
      event_queue: the event queue (instance of Queue).
      parse_error_queue: the parse error queue (instance of Queue).
      knowledge_base_values: optional dict containing the knowledge base
                             values. The default is None.
      file_entry: optional dfVFS file_entry object (instance of dfvfs.FileEntry)
                  being parsed.
      parser_chain: Optional string containing the parsing chain up to this
                    point. The default is None.

    Returns:
      A parser context object (instance of ParserMediator).
    """
        event_queue_producer = queue.ItemQueueProducer(event_queue)
        parse_error_queue_producer = queue.ItemQueueProducer(parse_error_queue)

        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.iteritems():
                knowledge_base_object.SetValue(identifier, value)

        new_mediator = mediator.ParserMediator(event_queue_producer,
                                               parse_error_queue_producer,
                                               knowledge_base_object)
        if file_entry:
            new_mediator.SetFileEntry(file_entry)
        if parser_chain:
            new_mediator.parser_chain = parser_chain
        return new_mediator
Example #25
0
File: psort.py Project: f-s-p/plaso
  def ProcessStorage(self, options):
    """Open a storage file and processes the events within.

    Args:
      options: the command line arguments (instance of argparse.Namespace).

    Returns:
      A counter.

    Raises:
      RuntimeError: if a non-recoverable situation is encountered.
    """
    counter = None

    slice_option = getattr(options, u'slice', None)
    if slice_option:
      timezone = getattr(options, u'timezone', u'UTC')
      if timezone == u'UTC':
        zone = pytz.utc
      else:
        zone = pytz.timezone(timezone)

      timestamp = timelib.Timestamp.FromTimeString(slice_option, timezone=zone)

      # Convert number of minutes to microseconds.
      range_operator = self._slice_size * 60 * 1000000

      # Set the time range.
      pfilter.TimeRangeCache.SetLowerTimestamp(timestamp - range_operator)
      pfilter.TimeRangeCache.SetUpperTimestamp(timestamp + range_operator)

    analysis_plugins = getattr(options, u'analysis_plugins', u'')
    if analysis_plugins:
      read_only = False
    else:
      read_only = True

    try:
      storage_file = self.OpenStorageFile(read_only=read_only)
    except IOError as exception:
      raise RuntimeError(
          u'Unable to open storage file: {0:s} with error: {1:s}.'.format(
              self._storage_file_path, exception))

    with storage_file:
      storage_file.SetStoreLimit(self._filter_object)

      if self._output_filename:
        output_stream = self._output_filename
      else:
        output_stream = sys.stdout

      formatter_mediator = self.GetFormatMediator()

      try:
        formatter_mediator.SetPreferredLanguageIdentifier(
            self._preferred_language)
      except (KeyError, TypeError) as exception:
        raise RuntimeError(exception)

      try:
        # TODO: move this into a factory function?
        output_module_class = output_manager.OutputManager.GetOutputClass(
            self._output_format)
        output_module = output_module_class(
            storage_file, formatter_mediator, filehandle=output_stream,
            config=options, filter_use=self._filter_object)

      except IOError as exception:
        raise RuntimeError(
            u'Unable to create output module with error: {0:s}'.format(
                exception))

      if not output_module:
        raise RuntimeError(u'Missing output module.')

      if analysis_plugins:
        logging.info(u'Starting analysis plugins.')
        # Within all preprocessing objects, try to get the last one that has
        # time zone information stored in it, the highest chance of it
        # containing the information we are seeking (defaulting to the last
        # one).
        pre_objs = storage_file.GetStorageInformation()
        pre_obj = pre_objs[-1]
        for obj in pre_objs:
          if getattr(obj, u'time_zone_str', u''):
            pre_obj = obj

        # Fill in the collection information.
        pre_obj.collection_information = {}
        encoding = getattr(pre_obj, u'preferred_encoding', None)
        if encoding:
          cmd_line = u' '.join(sys.argv)
          try:
            pre_obj.collection_information[u'cmd_line'] = cmd_line.decode(
                encoding)
          except UnicodeDecodeError:
            pass
        pre_obj.collection_information[u'file_processed'] = (
            self._storage_file_path)
        pre_obj.collection_information[u'method'] = u'Running Analysis Plugins'
        pre_obj.collection_information[u'plugins'] = analysis_plugins
        time_of_run = timelib.Timestamp.GetNow()
        pre_obj.collection_information[u'time_of_run'] = time_of_run

        pre_obj.counter = collections.Counter()

        # Assign the preprocessing object to the storage.
        # This is normally done in the construction of the storage object,
        # however we cannot do that here since the preprocessing object is
        # stored inside the storage file, so we need to open it first to
        # be able to read it in, before we make changes to it. Thus we need
        # to access this protected member of the class.
        # pylint: disable=protected-access
        storage_file._pre_obj = pre_obj

        # Start queues and load up plugins.
        # TODO: add upper queue limit.
        analysis_output_queue = multi_process.MultiProcessingQueue()
        event_queue_producers = []
        event_queues = []
        analysis_plugins_list = [
            name.strip() for name in analysis_plugins.split(u',')]

        for _ in xrange(0, len(analysis_plugins_list)):
          # TODO: add upper queue limit.
          analysis_plugin_queue = multi_process.MultiProcessingQueue()
          event_queues.append(analysis_plugin_queue)
          event_queue_producers.append(
              queue.ItemQueueProducer(event_queues[-1]))

        knowledge_base_object = knowledge_base.KnowledgeBase()

        analysis_plugins = analysis.LoadPlugins(
            analysis_plugins_list, event_queues, options)

        # Now we need to start all the plugins.
        for analysis_plugin in analysis_plugins:
          analysis_report_queue_producer = queue.ItemQueueProducer(
              analysis_output_queue)
          analysis_context_object = analysis_context.AnalysisContext(
              analysis_report_queue_producer, knowledge_base_object)
          analysis_process = multiprocessing.Process(
              name=u'Analysis {0:s}'.format(analysis_plugin.plugin_name),
              target=analysis_plugin.RunPlugin, args=(analysis_context_object,))
          self._analysis_processes.append(analysis_process)

          analysis_process.start()
          logging.info(
              u'Plugin: [{0:s}] started.'.format(analysis_plugin.plugin_name))
      else:
        event_queue_producers = []

      deduplicate_events = getattr(options, u'dedup', True)
      output_buffer = output_interface.EventBuffer(
          output_module, deduplicate_events)
      with output_buffer:
        counter = self.ProcessOutput(
            storage_file, output_buffer, my_filter=self._filter_object,
            filter_buffer=self._filter_buffer,
            analysis_queues=event_queue_producers)

      for information in storage_file.GetStorageInformation():
        if hasattr(information, u'counter'):
          counter[u'Stored Events'] += information.counter[u'total']

      if not getattr(options, u'quiet', False):
        logging.info(u'Output processing is done.')

      # Get all reports and tags from analysis plugins.
      if analysis_plugins:
        logging.info(u'Processing data from analysis plugins.')
        for event_queue_producer in event_queue_producers:
          event_queue_producer.SignalEndOfInput()

        # Wait for all analysis plugins to complete.
        for number, analysis_process in enumerate(self._analysis_processes):
          logging.debug(
              u'Waiting for analysis plugin: {0:d} to complete.'.format(number))
          if analysis_process.is_alive():
            analysis_process.join(10)
          else:
            logging.warning(u'Plugin {0:d} already stopped.'.format(number))
            analysis_process.terminate()
        logging.debug(u'All analysis plugins are now stopped.')

        # Close the output queue.
        analysis_output_queue.SignalEndOfInput()

        # Go over each output.
        analysis_queue_consumer = PsortAnalysisReportQueueConsumer(
            analysis_output_queue, storage_file, self._filter_expression,
            self.preferred_encoding)

        analysis_queue_consumer.ConsumeItems()

        if analysis_queue_consumer.tags:
          storage_file.StoreTagging(analysis_queue_consumer.tags)

        # TODO: analysis_queue_consumer.anomalies:

        for item, value in analysis_queue_consumer.counter.iteritems():
          counter[item] = value

    if self._filter_object and not counter[u'Limited By']:
      counter[u'Filter By Date'] = (
          counter[u'Stored Events'] - counter[u'Events Included'] -
          counter[u'Events Filtered Out'])

    return counter
Example #26
0
  def testExtractionWorker(self):
    """Tests the extraction worker functionality."""
    collection_queue = single_process.SingleProcessQueue()
    storage_queue = single_process.SingleProcessQueue()
    parse_error_queue = single_process.SingleProcessQueue()

    event_queue_producer = single_process.SingleProcessItemQueueProducer(
        storage_queue)
    parse_error_queue_producer = single_process.SingleProcessItemQueueProducer(
        parse_error_queue)

    knowledge_base_object = knowledge_base.KnowledgeBase()

    parser_mediator = parsers_mediator.ParserMediator(
        event_queue_producer, parse_error_queue_producer,
        knowledge_base_object)

    resolver_context = context.Context()

    extraction_worker = worker.BaseEventExtractionWorker(
        0, collection_queue, event_queue_producer, parse_error_queue_producer,
        parser_mediator, resolver_context=resolver_context)

    self.assertNotEqual(extraction_worker, None)

    extraction_worker.InitializeParserObjects()

    # Process a file.
    source_path = self._GetTestFilePath([u'syslog'])
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)

    collection_queue.PushItem(path_spec)
    extraction_worker.Run()

    test_queue_consumer = test_lib.TestQueueConsumer(storage_queue)
    test_queue_consumer.ConsumeItems()

    self.assertEqual(test_queue_consumer.number_of_items, 16)

    # Process a compressed file.
    source_path = self._GetTestFilePath([u'syslog.gz'])
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)

    collection_queue.PushItem(path_spec)
    extraction_worker.Run()

    test_queue_consumer = test_lib.TestQueueConsumer(storage_queue)
    test_queue_consumer.ConsumeItems()

    self.assertEqual(test_queue_consumer.number_of_items, 16)

    source_path = self._GetTestFilePath([u'syslog.bz2'])
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)

    collection_queue.PushItem(path_spec)
    extraction_worker.Run()

    test_queue_consumer = test_lib.TestQueueConsumer(storage_queue)
    test_queue_consumer.ConsumeItems()

    self.assertEqual(test_queue_consumer.number_of_items, 15)

    # Process a file in an archive.
    source_path = self._GetTestFilePath([u'syslog.tar'])
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TAR, location=u'/syslog',
        parent=path_spec)

    collection_queue.PushItem(path_spec)
    extraction_worker.Run()

    test_queue_consumer = test_lib.TestQueueConsumer(storage_queue)
    test_queue_consumer.ConsumeItems()

    self.assertEqual(test_queue_consumer.number_of_items, 13)

    # Process an archive file without "process archive files" mode.
    extraction_worker.SetProcessArchiveFiles(False)

    source_path = self._GetTestFilePath([u'syslog.tar'])
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)

    collection_queue.PushItem(path_spec)
    extraction_worker.Run()

    test_queue_consumer = test_lib.TestQueueConsumer(storage_queue)
    test_queue_consumer.ConsumeItems()

    self.assertEqual(test_queue_consumer.number_of_items, 3)

    # Process an archive file with "process archive files" mode.
    extraction_worker.SetProcessArchiveFiles(True)

    source_path = self._GetTestFilePath([u'syslog.tar'])
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)

    collection_queue.PushItem(path_spec)
    extraction_worker.Run()

    test_queue_consumer = test_lib.TestQueueConsumer(storage_queue)
    test_queue_consumer.ConsumeItems()

    self.assertEqual(test_queue_consumer.number_of_items, 16)

    # Process a file in a compressed archive.
    source_path = self._GetTestFilePath([u'syslog.tgz'])
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_GZIP, parent=path_spec)
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TAR, location=u'/syslog',
        parent=path_spec)

    collection_queue.PushItem(path_spec)
    extraction_worker.Run()

    test_queue_consumer = test_lib.TestQueueConsumer(storage_queue)
    test_queue_consumer.ConsumeItems()

    self.assertEqual(test_queue_consumer.number_of_items, 13)

    # Process an archive file with "process archive files" mode.
    extraction_worker.SetProcessArchiveFiles(True)

    source_path = self._GetTestFilePath([u'syslog.tgz'])
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)

    collection_queue.PushItem(path_spec)
    extraction_worker.Run()

    test_queue_consumer = test_lib.TestQueueConsumer(storage_queue)
    test_queue_consumer.ConsumeItems()

    self.assertEqual(test_queue_consumer.number_of_items, 17)