Ejemplo n.º 1
0
    def _ParseFile(self, path_segments, parser, knowledge_base_object):
        """Parses a file using the parser.

    Args:
      path_segments (list[str]): path segments inside the test data directory.
      parser (BaseParser): parser.
      knowledge_base_object (KnowledgeBase): knowledge base.

    Returns:
      FakeStorageWriter: storage writer.
    """
        session = sessions.Session()
        storage_writer = fake_writer.FakeStorageWriter(session)
        storage_writer.Open()

        mediator = parsers_mediator.ParserMediator(storage_writer,
                                                   knowledge_base_object)

        file_entry = self._GetTestFileEntry(path_segments)
        mediator.SetFileEntry(file_entry)

        if isinstance(parser, parsers_interface.FileEntryParser):
            parser.Parse(mediator)

        elif isinstance(parser, parsers_interface.FileObjectParser):
            file_object = file_entry.GetFileObject()
            try:
                parser.Parse(mediator, file_object)
            finally:
                file_object.close()

        else:
            self.fail('Got unexpected parser type: {0:s}'.format(type(parser)))

        return storage_writer
Ejemplo n.º 2
0
    def testProduceEventWithEventData(self):
        """Tests the ProduceEventWithEventData method."""
        session = sessions.Session()
        storage_writer = fake_writer.FakeStorageWriter(session)
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(storage_writer,
                                                  knowledge_base_object)

        storage_writer.Open()

        event_data_stream = events.EventDataStream()
        parser_mediator.ProduceEventDataStream(event_data_stream)

        date_time = fake_time.FakeTime()
        event_with_timestamp = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_WRITTEN)
        event_with_timestamp.parser = 'test_parser'
        event_data = events.EventData()
        event_data.parser = 'test_parser'

        parser_mediator.ProduceEventWithEventData(event_with_timestamp,
                                                  event_data)
        self.assertEqual(storage_writer.number_of_warnings, 0)
        self.assertEqual(storage_writer.number_of_events, 1)

        event_without_timestamp = events.EventObject()
        event_without_timestamp.parser = 'test_parser'
        with self.assertRaises(errors.InvalidEvent):
            parser_mediator.ProduceEventWithEventData(event_without_timestamp,
                                                      event_data)
Ejemplo n.º 3
0
    def testExtractionWorkerHashing(self):
        """Test that the worker sets up and runs hashing code correctly."""
        collection_queue = single_process.SingleProcessQueue()
        storage_queue = single_process.SingleProcessQueue()
        parse_error_queue = single_process.SingleProcessQueue()
        event_queue_producer = single_process.SingleProcessItemQueueProducer(
            storage_queue)
        parse_error_queue_producer = single_process.SingleProcessItemQueueProducer(
            parse_error_queue)

        knowledge_base_object = knowledge_base.KnowledgeBase()

        parser_mediator = parsers_mediator.ParserMediator(
            event_queue_producer, parse_error_queue_producer,
            knowledge_base_object)

        resolver_context = context.Context()

        extraction_worker = worker.BaseEventExtractionWorker(
            0,
            collection_queue,
            event_queue_producer,
            parse_error_queue_producer,
            parser_mediator,
            resolver_context=resolver_context)

        # We're going to check that the worker set up its internal state correctly.
        # pylint: disable=protected-access
        extraction_worker.SetHashers(hasher_names_string=u'md5')
        self.assertEqual(1, len(extraction_worker._hasher_names))

        extraction_worker.InitializeParserObjects()
Ejemplo n.º 4
0
    def _CreateParserMediator(self,
                              storage_writer,
                              knowledge_base_object,
                              file_entry=None,
                              parser_chain=None):
        """Creates a parser mediator.

    Args:
      storage_writer (StorageWriter): storage writer.
      knowledge_base_object (KnowledgeBase): knowledge base.
      file_entry (Optional[dfvfs.FileEntry]): file entry object being parsed.
      parser_chain (Optional[str]): parsing chain up to this point.

    Returns:
      ParserMediator: parser mediator.
    """
        parser_mediator = parsers_mediator.ParserMediator(
            storage_writer, knowledge_base_object)

        if file_entry:
            parser_mediator.SetFileEntry(file_entry)

        if parser_chain:
            parser_mediator.parser_chain = parser_chain

        return parser_mediator
Ejemplo n.º 5
0
  def _ParseFile(self, parser_object, path, knowledge_base_object):
    """Parses a file using the parser object.

    Args:
      parser_object: the parser object.
      path: the path of the file to parse.
      knowledge_base_object: the knowledge base object (instance of
                             KnowledgeBase).

    Returns:
      An event object queue object (instance of Queue).
    """
    event_queue = single_process.SingleProcessQueue()
    event_queue_producer = plaso_queue.ItemQueueProducer(event_queue)

    parse_error_queue = single_process.SingleProcessQueue()

    parser_mediator = parsers_mediator.ParserMediator(
        event_queue_producer, parse_error_queue, knowledge_base_object)
    path_spec = path_spec_factory.Factory.NewPathSpec(
        definitions.TYPE_INDICATOR_OS, location=path)
    file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec)
    parser_mediator.SetFileEntry(file_entry)

    file_object = file_entry.GetFileObject()
    try:
      parser_object.Parse(parser_mediator, file_object)
    finally:
      file_object.close()

    return event_queue
Ejemplo n.º 6
0
    def _TestProcessPathSpec(self,
                             storage_writer,
                             path_spec,
                             expected_event_counters,
                             extraction_worker=None,
                             knowledge_base_values=None,
                             process_archives=False):
        """Tests processing a path specification.

    Args:
      storage_writer (StorageWriter): storage writer.
      path_spec (dfvfs.PathSpec): path specification.
      expected_event_counters (dict[str, int|list[int]]): expected event
          counters per event data type.
      extraction_worker (Optional[EventExtractorWorker]): worker to process the
          path specification. If None, a new worker will be created.
      knowledge_base_values (Optional[dict]): knowledge base values.
      process_archives (Optional[bool]): whether archive files should be
          processed.
    """
        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.items():
                knowledge_base_object.SetValue(identifier, value)

        resolver_context = context.Context()
        mediator = parsers_mediator.ParserMediator(
            storage_writer,
            knowledge_base_object,
            resolver_context=resolver_context)

        if not extraction_worker:
            configuration = configurations.ExtractionConfiguration()
            configuration.process_archives = process_archives

            extraction_worker = worker.EventExtractionWorker()
            extraction_worker.SetExtractionConfiguration(configuration)

        storage_writer.Open()

        try:
            storage_writer.WriteSessionStart()

            extraction_worker.ProcessPathSpec(mediator, path_spec)
            event_source = storage_writer.GetFirstWrittenEventSource()
            while event_source:
                extraction_worker.ProcessPathSpec(mediator,
                                                  event_source.path_spec)
                event_source = storage_writer.GetNextWrittenEventSource()

            storage_writer.WriteSessionCompletion()

            if expected_event_counters:
                self.CheckEventCounters(storage_writer,
                                        expected_event_counters)

        finally:
            storage_writer.Close()
Ejemplo n.º 7
0
  def testGetDisplayName(self):
    """Tests the GetDisplayName function."""
    knowledge_base_object = knowledge_base.KnowledgeBase()
    parser_mediator = mediator.ParserMediator(knowledge_base_object)

    storage_writer = fake_writer.FakeStorageWriter()
    parser_mediator.SetStorageWriter(storage_writer)

    with self.assertRaises(ValueError):
      parser_mediator.GetDisplayName(file_entry=None)

    test_file_path = self._GetTestFilePath(['syslog.gz'])
    self._SkipIfPathNotExists(test_file_path)

    os_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path)
    file_entry = path_spec_resolver.Resolver.OpenFileEntry(os_path_spec)

    display_name = parser_mediator.GetDisplayName(file_entry=file_entry)

    expected_display_name = 'OS:{0:s}'.format(test_file_path)
    self.assertEqual(display_name, expected_display_name)

    gzip_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_GZIP, parent=os_path_spec)
    file_entry = path_spec_resolver.Resolver.OpenFileEntry(gzip_path_spec)

    display_name = parser_mediator.GetDisplayName(file_entry=file_entry)

    expected_display_name = 'GZIP:{0:s}'.format(test_file_path)
    self.assertEqual(display_name, expected_display_name)

    test_file_path = self._GetTestFilePath(['vsstest.qcow2'])
    self._SkipIfPathNotExists(test_file_path)

    os_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path)
    qcow_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_QCOW, parent=os_path_spec)
    vshadow_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_VSHADOW, location='/vss2',
        store_index=1, parent=qcow_path_spec)
    tsk_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, inode=35, location='/syslog.gz',
        parent=vshadow_path_spec)

    file_entry = path_spec_resolver.Resolver.OpenFileEntry(tsk_path_spec)

    display_name = parser_mediator.GetDisplayName(file_entry=file_entry)

    expected_display_name = 'VSS2:TSK:/syslog.gz'
    self.assertEqual(display_name, expected_display_name)

    parser_mediator.SetTextPrepend('C:')

    display_name = parser_mediator.GetDisplayName(file_entry=file_entry)
    expected_display_name = 'VSS2:TSK:C:/syslog.gz'
    self.assertEqual(display_name, expected_display_name)
Ejemplo n.º 8
0
    def testResetFileEntry(self):
        """Tests the ResetFileEntry function."""
        session = sessions.Session()
        storage_writer = fake_writer.FakeStorageWriter(session)
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(storage_writer,
                                                  knowledge_base_object)

        parser_mediator.ResetFileEntry()
Ejemplo n.º 9
0
    def testSetFileEntry(self):
        """Tests the SetFileEntry function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(knowledge_base_object)

        storage_writer = fake_writer.FakeStorageWriter()
        parser_mediator.SetStorageWriter(storage_writer)

        parser_mediator.SetFileEntry(None)
Ejemplo n.º 10
0
    def testSignalAbort(self):
        """Tests the SignalAbort function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(knowledge_base_object)

        storage_writer = fake_writer.FakeStorageWriter()
        parser_mediator.SetStorageWriter(storage_writer)

        parser_mediator.SignalAbort()
Ejemplo n.º 11
0
    def testSetStorageWriter(self):
        """Tests the SetStorageWriter function."""
        session = sessions.Session()
        storage_writer = fake_writer.FakeStorageWriter(session)
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(storage_writer,
                                                  knowledge_base_object)

        parser_mediator.SetStorageWriter(None)
Ejemplo n.º 12
0
    def testGetLatestYearFromFileEntry(self):
        """Tests the _GetLatestYearFromFileEntry function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(knowledge_base_object)

        storage_writer = fake_writer.FakeStorageWriter()
        parser_mediator.SetStorageWriter(storage_writer)

        latest_year = parser_mediator._GetLatestYearFromFileEntry()
        self.assertIsNone(latest_year)
Ejemplo n.º 13
0
    def testGetFilename(self):
        """Tests the GetFilename function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(knowledge_base_object)

        storage_writer = fake_writer.FakeStorageWriter()
        parser_mediator.SetStorageWriter(storage_writer)

        filename = parser_mediator.GetFilename()
        self.assertIsNone(filename)
Ejemplo n.º 14
0
    def testGetFileEntry(self):
        """Tests the GetFileEntry function."""
        session = sessions.Session()
        storage_writer = fake_writer.FakeStorageWriter(session)
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(storage_writer,
                                                  knowledge_base_object)

        file_entry = parser_mediator.GetFileEntry()
        self.assertIsNone(file_entry)
Ejemplo n.º 15
0
    def testGetEarliestYearFromFileEntry(self):
        """Tests the _GetEarliestYearFromFileEntry function."""
        session = sessions.Session()
        storage_writer = fake_writer.FakeStorageWriter(session)
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(storage_writer,
                                                  knowledge_base_object)

        earliest_year = parser_mediator._GetEarliestYearFromFileEntry()
        self.assertIsNone(earliest_year)
Ejemplo n.º 16
0
    def _CreateExtractionWorker(self,
                                worker_number,
                                filter_object=None,
                                mount_path=None,
                                process_archive_files=False,
                                text_prepend=None):
        """Creates an extraction worker object.

    Args:
      worker_number: a number that identifies the worker.
      filter_object: optional filter object (instance of objectfilter.Filter).
      mount_path: optional string containing the mount path.
      process_archive_files: optional boolean value to indicate if the worker
                             should scan for file entries inside files.
      text_prepend: optional string that contains the text to prepend to every
                    event object.

    Returns:
      An extraction worker (instance of worker.ExtractionWorker).
    """
        parser_mediator = parsers_mediator.ParserMediator(
            self._event_queue_producer, self._parse_error_queue_producer,
            self.knowledge_base)

        resolver_context = context.Context()

        extraction_worker = SingleProcessEventExtractionWorker(
            worker_number,
            self._path_spec_queue,
            self._event_queue_producer,
            self._parse_error_queue_producer,
            parser_mediator,
            resolver_context=resolver_context,
            status_update_callback=self._UpdateStatus)

        # TODO: differentiate between debug output and debug mode.
        extraction_worker.SetEnableDebugMode(self._enable_debug_output)

        extraction_worker.SetEnableProfiling(
            self._enable_profiling,
            profiling_sample_rate=self._profiling_sample_rate,
            profiling_type=self._profiling_type)

        extraction_worker.SetProcessArchiveFiles(process_archive_files)

        if filter_object:
            extraction_worker.SetFilterObject(filter_object)

        if mount_path:
            extraction_worker.SetMountPath(mount_path)

        if text_prepend:
            extraction_worker.SetTextPrepend(text_prepend)

        return extraction_worker
Ejemplo n.º 17
0
    def testGetLatestYear(self):
        """Tests the GetLatestYear function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(knowledge_base_object)

        storage_writer = fake_writer.FakeStorageWriter()
        parser_mediator.SetStorageWriter(storage_writer)

        expected_latest_year = parser_mediator.GetCurrentYear()
        latest_year = parser_mediator.GetLatestYear()
        self.assertEqual(latest_year, expected_latest_year)
Ejemplo n.º 18
0
    def testGetEstimatedYear(self):
        """Tests the GetEstimatedYear function."""
        session = sessions.Session()
        storage_writer = fake_writer.FakeStorageWriter(session)
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(storage_writer,
                                                  knowledge_base_object)

        expected_estimated_year = parser_mediator.GetCurrentYear()
        estimated_year = parser_mediator.GetEstimatedYear()
        self.assertEqual(estimated_year, expected_estimated_year)
Ejemplo n.º 19
0
    def testProduceRecoveryWarning(self):
        """Tests the ProduceRecoveryWarning method."""
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(knowledge_base_object)

        storage_writer = fake_writer.FakeStorageWriter()
        parser_mediator.SetStorageWriter(storage_writer)

        storage_writer.Open()

        parser_mediator.ProduceRecoveryWarning('test')
        self.assertEqual(storage_writer.number_of_events, 0)
        self.assertEqual(storage_writer.number_of_extraction_warnings, 0)
        self.assertEqual(storage_writer.number_of_recovery_warnings, 1)
Ejemplo n.º 20
0
    def testAnalyzeFileObject(self):
        """Tests the _AnalyzeFileObject function."""
        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        storage_writer = fake_writer.FakeStorageWriter(session)

        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.items():
                knowledge_base_object.SetValue(identifier, value)

        resolver_context = context.Context()
        mediator = parsers_mediator.ParserMediator(
            storage_writer,
            knowledge_base_object,
            preferred_year=2016,
            resolver_context=resolver_context)

        extraction_worker = worker.EventExtractionWorker()

        test_analyzer = analyzers_manager_test.TestAnalyzer()
        self.assertEqual(len(test_analyzer.GetResults()), 0)

        extraction_worker._analyzers = [test_analyzer]

        storage_writer.Open()
        storage_writer.WriteSessionStart()

        file_entry = self._GetTestFileEntry(['ímynd.dd'])
        mediator.SetFileEntry(file_entry)

        file_object = file_entry.GetFileObject()
        display_name = mediator.GetDisplayName()
        event_data_stream = events.EventDataStream()

        try:
            extraction_worker._AnalyzeFileObject(file_object, display_name,
                                                 event_data_stream)
        finally:
            file_object.close()

        storage_writer.WriteSessionCompletion()
        storage_writer.Close()

        self.assertIsNotNone(event_data_stream)

        event_attribute = getattr(event_data_stream, 'test_result', None)
        self.assertEqual(event_attribute, 'is_vegetable')
Ejemplo n.º 21
0
    def testAnalyzeDataStream(self):
        """Tests the _AnalyzeDataStream function."""
        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        storage_writer = fake_writer.FakeStorageWriter()

        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.items():
                knowledge_base_object.SetValue(identifier, value)

        resolver_context = context.Context()
        parser_mediator = parsers_mediator.ParserMediator(
            knowledge_base_object, resolver_context=resolver_context)
        parser_mediator.SetPreferredYear(2016)
        parser_mediator.SetStorageWriter(storage_writer)

        extraction_worker = worker.EventExtractionWorker()

        test_analyzer = analyzers_manager_test.TestAnalyzer()
        self.assertEqual(len(test_analyzer.GetResults()), 0)

        extraction_worker._analyzers = [test_analyzer]

        storage_writer.Open()

        session_start = session.CreateSessionStart()
        storage_writer.AddAttributeContainer(session_start)

        file_entry = self._GetTestFileEntry(['syslog.tgz'])
        parser_mediator.SetFileEntry(file_entry)

        display_name = parser_mediator.GetDisplayName()
        event_data_stream = events.EventDataStream()

        extraction_worker._AnalyzeDataStream(file_entry, '', display_name,
                                             event_data_stream)

        session_completion = session.CreateSessionCompletion()
        storage_writer.AddAttributeContainer(session_completion)

        storage_writer.Close()

        self.assertIsNotNone(event_data_stream)

        event_attribute = getattr(event_data_stream, 'test_result', None)
        self.assertEqual(event_attribute, 'is_vegetable')
Ejemplo n.º 22
0
    def _CreateParserMediator(self,
                              session,
                              storage_writer,
                              collection_filters_helper=None,
                              file_entry=None,
                              knowledge_base_values=None,
                              parser_chain=None,
                              timezone='UTC'):
        """Creates a parser mediator.

    Args:
      session (Session): session.
      storage_writer (StorageWriter): storage writer.
      collection_filters_helper (Optional[CollectionFiltersHelper]): collection
          filters helper.
      file_entry (Optional[dfvfs.FileEntry]): file entry object being parsed.
      knowledge_base_values (Optional[dict]): knowledge base values.
      parser_chain (Optional[str]): parsing chain up to this point.
      timezone (Optional[str]): timezone.

    Returns:
      ParserMediator: parser mediator.
    """
        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.items():
                if identifier == 'codepage':
                    knowledge_base_object.SetCodepage(value)
                else:
                    knowledge_base_object.SetValue(identifier, value)

        knowledge_base_object.SetTimeZone(timezone)

        parser_mediator = parsers_mediator.ParserMediator(
            session,
            storage_writer,
            knowledge_base_object,
            collection_filters_helper=collection_filters_helper)

        if file_entry:
            parser_mediator.SetFileEntry(file_entry)

        if parser_chain:
            parser_mediator.parser_chain = parser_chain

        return parser_mediator
Ejemplo n.º 23
0
    def testGetDisplayNameForPathSpec(self):
        """Tests the GetDisplayNameForPathSpec function."""
        knowledge_base_object = knowledge_base.KnowledgeBase()
        parser_mediator = mediator.ParserMediator(knowledge_base_object)

        storage_writer = fake_writer.FakeStorageWriter()
        parser_mediator.SetStorageWriter(storage_writer)

        test_file_path = self._GetTestFilePath(['syslog.gz'])
        self._SkipIfPathNotExists(test_file_path)

        os_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path)

        expected_display_name = 'OS:{0:s}'.format(test_file_path)
        display_name = parser_mediator.GetDisplayNameForPathSpec(os_path_spec)
        self.assertEqual(display_name, expected_display_name)
Ejemplo n.º 24
0
    def CreateExtractionWorker(self, worker_number):
        """Creates an extraction worker object.

    Args:
      worker_number: A number that identifies the worker.

    Returns:
      An extraction worker (instance of worker.ExtractionWorker).
    """
        parser_mediator = parsers_mediator.ParserMediator(
            self._event_queue_producer, self._parse_error_queue_producer,
            self.knowledge_base)

        resolver_context = context.Context()

        extraction_worker = SingleProcessEventExtractionWorker(
            worker_number,
            self._collection_queue,
            self._event_queue_producer,
            self._parse_error_queue_producer,
            parser_mediator,
            resolver_context=resolver_context)

        extraction_worker.SetEnableDebugOutput(self._enable_debug_output)

        # TODO: move profiler in separate object.
        extraction_worker.SetEnableProfiling(
            self._enable_profiling,
            profiling_sample_rate=self._profiling_sample_rate)

        if self._process_archive_files:
            extraction_worker.SetProcessArchiveFiles(
                self._process_archive_files)

        if self._filter_object:
            extraction_worker.SetFilterObject(self._filter_object)

        if self._mount_path:
            extraction_worker.SetMountPath(self._mount_path)

        if self._text_prepend:
            extraction_worker.SetTextPrepend(self._text_prepend)

        return extraction_worker
Ejemplo n.º 25
0
    def CreateParserMediator(self, event_queue=None):
        """Create a parser mediator object.

    Args:
      event_queue: an optional event queue object (instance of Queue).

    Returns:
      A parser mediator object (instance of parsers_mediator.ParserMediator).
    """
        if event_queue is None:
            event_queue = single_process.SingleProcessQueue()
        event_queue_producer = queue.ItemQueueProducer(event_queue)

        parse_error_queue = single_process.SingleProcessQueue()
        parse_error_queue_producer = queue.ItemQueueProducer(parse_error_queue)

        return parsers_mediator.ParserMediator(event_queue_producer,
                                               parse_error_queue_producer,
                                               self.knowledge_base_object)
Ejemplo n.º 26
0
    def _CreateParserMediator(self,
                              storage_writer,
                              artifacts_filter_helper=None,
                              file_entry=None,
                              knowledge_base_values=None,
                              parser_chain=None,
                              timezone='UTC'):
        """Creates a parser mediator.

    Args:
      storage_writer (StorageWriter): storage writer.
      artifacts_filter_helper (Optional[ArtifactDefinitionsFilterHelper]):
          artifacts definitions filter helper.
      file_entry (Optional[dfvfs.FileEntry]): file entry object being parsed.
      knowledge_base_values (Optional[dict]): knowledge base values.
      parser_chain (Optional[str]): parsing chain up to this point.
      timezone (str): timezone.

    Returns:
      ParserMediator: parser mediator.
    """
        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in iter(knowledge_base_values.items()):
                if identifier == 'codepage':
                    knowledge_base_object.SetCodepage(value)
                else:
                    knowledge_base_object.SetValue(identifier, value)

        knowledge_base_object.SetTimeZone(timezone)

        parser_mediator = mediator.ParserMediator(
            storage_writer,
            knowledge_base_object,
            artifacts_filter_helper=artifacts_filter_helper)

        if file_entry:
            parser_mediator.SetFileEntry(file_entry)

        if parser_chain:
            parser_mediator.parser_chain = parser_chain

        return parser_mediator
Ejemplo n.º 27
0
    def testGetCompressedStreamTypes(self):
        """Tests the _GetCompressedStreamTypes function."""
        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        storage_writer = fake_writer.FakeStorageWriter()

        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.items():
                knowledge_base_object.SetValue(identifier, value)

        resolver_context = context.Context()
        parser_mediator = parsers_mediator.ParserMediator(
            knowledge_base_object, resolver_context=resolver_context)
        parser_mediator.SetPreferredYear(2016)
        parser_mediator.SetStorageWriter(storage_writer)

        extraction_worker = worker.EventExtractionWorker()

        test_analyzer = analyzers_manager_test.TestAnalyzer()
        self.assertEqual(len(test_analyzer.GetResults()), 0)

        extraction_worker._analyzers = [test_analyzer]

        storage_writer.Open()

        session_start = session.CreateSessionStart()
        storage_writer.AddAttributeContainer(session_start)

        extraction_worker = worker.EventExtractionWorker()

        path_spec = self._GetTestFilePathSpec(['syslog.tgz'])

        type_indicators = extraction_worker._GetCompressedStreamTypes(
            parser_mediator, path_spec)
        self.assertEqual(type_indicators,
                         [dfvfs_definitions.TYPE_INDICATOR_GZIP])

        session_completion = session.CreateSessionCompletion()
        storage_writer.AddAttributeContainer(session_completion)

        storage_writer.Close()
Ejemplo n.º 28
0
  def CreateExtractionWorker(self, worker_number):
    """Creates an extraction worker object.

    Args:
      worker_number: A number that identifies the worker.

    Returns:
      An extraction worker (instance of worker.ExtractionWorker).
    """
    parser_mediator = parsers_mediator.ParserMediator(
        self._event_queue_producer, self._parse_error_queue_producer,
        self.knowledge_base)

    # We need a resolver context per process to prevent multi processing
    # issues with file objects stored in images.
    resolver_context = context.Context()

    extraction_worker = worker.BaseEventExtractionWorker(
        worker_number, self._collection_queue, self._event_queue_producer,
        self._parse_error_queue_producer, parser_mediator,
        resolver_context=resolver_context)

    extraction_worker.SetEnableDebugOutput(self._enable_debug_output)

    extraction_worker.SetEnableProfiling(
        self._enable_profiling,
        profiling_sample_rate=self._profiling_sample_rate,
        profiling_type=self._profiling_type)

    if self._process_archive_files:
      extraction_worker.SetProcessArchiveFiles(self._process_archive_files)

    if self._filter_object:
      extraction_worker.SetFilterObject(self._filter_object)

    if self._mount_path:
      extraction_worker.SetMountPath(self._mount_path)

    if self._text_prepend:
      extraction_worker.SetTextPrepend(self._text_prepend)

    return extraction_worker
Ejemplo n.º 29
0
    def _TestProcessPathSpec(self,
                             storage_writer,
                             path_spec,
                             extraction_worker=None,
                             knowledge_base_values=None,
                             process_archives=False):
        """Tests processing a path specification.

    Args:
      storage_writer (StorageWriter): storage writer.
      path_spec (dfvfs.PathSpec): path specification.
      extraction_worker (Optional[EventExtractorWorker]): worker to process the
          pathspec. If None, a new worker will be created.
      knowledge_base_values (Optional[dict]): knowledge base values.
      process_archives (Optional[bool]): whether archive files should be
          processed.
    """
        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in iter(knowledge_base_values.items()):
                knowledge_base_object.SetValue(identifier, value)

        mediator = parsers_mediator.ParserMediator(storage_writer,
                                                   knowledge_base_object)

        if not extraction_worker:
            resolver_context = context.Context()

            extraction_worker = worker.EventExtractionWorker(
                resolver_context, process_archives=process_archives)

        storage_writer.Open()
        storage_writer.WriteSessionStart()

        extraction_worker.ProcessPathSpec(mediator, path_spec)
        event_source = storage_writer.GetFirstWrittenEventSource()
        while event_source:
            extraction_worker.ProcessPathSpec(mediator, event_source.path_spec)
            event_source = storage_writer.GetNextWrittenEventSource()

        storage_writer.WriteSessionCompletion()
        storage_writer.Close()
Ejemplo n.º 30
0
    def _ParseFile(self, path_segments, parser, knowledge_base_object):
        """Parses a file using the parser.

    Args:
      path_segments (list[str]): path segments inside the test data directory.
      parser (BaseParser): parser.
      knowledge_base_object (KnowledgeBase): knowledge base.

    Returns:
      FakeStorageWriter: storage writer.

    Raises:
      SkipTest: if the path inside the test data directory does not exist and
          the test should be skipped.
    """
        session = sessions.Session()
        storage_writer = fake_writer.FakeStorageWriter(session)
        storage_writer.Open()

        parser_mediator = parsers_mediator.ParserMediator(
            storage_writer, knowledge_base_object)

        file_entry = self._GetTestFileEntry(path_segments)
        parser_mediator.SetFileEntry(file_entry)

        event_data_stream = events.EventDataStream()
        parser_mediator.ProduceEventDataStream(event_data_stream)

        if isinstance(parser, parsers_interface.FileEntryParser):
            parser.Parse(parser_mediator)

        elif isinstance(parser, parsers_interface.FileObjectParser):
            file_object = file_entry.GetFileObject()
            try:
                parser.Parse(parser_mediator, file_object)
            finally:
                file_object.close()

        else:
            self.fail('Got unexpected parser type: {0!s}'.format(type(parser)))

        return storage_writer