def _ParseFile(self, path_segments, parser, knowledge_base_object): """Parses a file using the parser. Args: path_segments (list[str]): path segments inside the test data directory. parser (BaseParser): parser. knowledge_base_object (KnowledgeBase): knowledge base. Returns: FakeStorageWriter: storage writer. """ session = sessions.Session() storage_writer = fake_writer.FakeStorageWriter(session) storage_writer.Open() mediator = parsers_mediator.ParserMediator(storage_writer, knowledge_base_object) file_entry = self._GetTestFileEntry(path_segments) mediator.SetFileEntry(file_entry) if isinstance(parser, parsers_interface.FileEntryParser): parser.Parse(mediator) elif isinstance(parser, parsers_interface.FileObjectParser): file_object = file_entry.GetFileObject() try: parser.Parse(mediator, file_object) finally: file_object.close() else: self.fail('Got unexpected parser type: {0:s}'.format(type(parser))) return storage_writer
def testProduceEventWithEventData(self): """Tests the ProduceEventWithEventData method.""" session = sessions.Session() storage_writer = fake_writer.FakeStorageWriter(session) knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(storage_writer, knowledge_base_object) storage_writer.Open() event_data_stream = events.EventDataStream() parser_mediator.ProduceEventDataStream(event_data_stream) date_time = fake_time.FakeTime() event_with_timestamp = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) event_with_timestamp.parser = 'test_parser' event_data = events.EventData() event_data.parser = 'test_parser' parser_mediator.ProduceEventWithEventData(event_with_timestamp, event_data) self.assertEqual(storage_writer.number_of_warnings, 0) self.assertEqual(storage_writer.number_of_events, 1) event_without_timestamp = events.EventObject() event_without_timestamp.parser = 'test_parser' with self.assertRaises(errors.InvalidEvent): parser_mediator.ProduceEventWithEventData(event_without_timestamp, event_data)
def testExtractionWorkerHashing(self): """Test that the worker sets up and runs hashing code correctly.""" collection_queue = single_process.SingleProcessQueue() storage_queue = single_process.SingleProcessQueue() parse_error_queue = single_process.SingleProcessQueue() event_queue_producer = single_process.SingleProcessItemQueueProducer( storage_queue) parse_error_queue_producer = single_process.SingleProcessItemQueueProducer( parse_error_queue) knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = parsers_mediator.ParserMediator( event_queue_producer, parse_error_queue_producer, knowledge_base_object) resolver_context = context.Context() extraction_worker = worker.BaseEventExtractionWorker( 0, collection_queue, event_queue_producer, parse_error_queue_producer, parser_mediator, resolver_context=resolver_context) # We're going to check that the worker set up its internal state correctly. # pylint: disable=protected-access extraction_worker.SetHashers(hasher_names_string=u'md5') self.assertEqual(1, len(extraction_worker._hasher_names)) extraction_worker.InitializeParserObjects()
def _CreateParserMediator(self, storage_writer, knowledge_base_object, file_entry=None, parser_chain=None): """Creates a parser mediator. Args: storage_writer (StorageWriter): storage writer. knowledge_base_object (KnowledgeBase): knowledge base. file_entry (Optional[dfvfs.FileEntry]): file entry object being parsed. parser_chain (Optional[str]): parsing chain up to this point. Returns: ParserMediator: parser mediator. """ parser_mediator = parsers_mediator.ParserMediator( storage_writer, knowledge_base_object) if file_entry: parser_mediator.SetFileEntry(file_entry) if parser_chain: parser_mediator.parser_chain = parser_chain return parser_mediator
def _ParseFile(self, parser_object, path, knowledge_base_object): """Parses a file using the parser object. Args: parser_object: the parser object. path: the path of the file to parse. knowledge_base_object: the knowledge base object (instance of KnowledgeBase). Returns: An event object queue object (instance of Queue). """ event_queue = single_process.SingleProcessQueue() event_queue_producer = plaso_queue.ItemQueueProducer(event_queue) parse_error_queue = single_process.SingleProcessQueue() parser_mediator = parsers_mediator.ParserMediator( event_queue_producer, parse_error_queue, knowledge_base_object) path_spec = path_spec_factory.Factory.NewPathSpec( definitions.TYPE_INDICATOR_OS, location=path) file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) parser_mediator.SetFileEntry(file_entry) file_object = file_entry.GetFileObject() try: parser_object.Parse(parser_mediator, file_object) finally: file_object.close() return event_queue
def _TestProcessPathSpec(self, storage_writer, path_spec, expected_event_counters, extraction_worker=None, knowledge_base_values=None, process_archives=False): """Tests processing a path specification. Args: storage_writer (StorageWriter): storage writer. path_spec (dfvfs.PathSpec): path specification. expected_event_counters (dict[str, int|list[int]]): expected event counters per event data type. extraction_worker (Optional[EventExtractorWorker]): worker to process the path specification. If None, a new worker will be created. knowledge_base_values (Optional[dict]): knowledge base values. process_archives (Optional[bool]): whether archive files should be processed. """ knowledge_base_object = knowledge_base.KnowledgeBase() if knowledge_base_values: for identifier, value in knowledge_base_values.items(): knowledge_base_object.SetValue(identifier, value) resolver_context = context.Context() mediator = parsers_mediator.ParserMediator( storage_writer, knowledge_base_object, resolver_context=resolver_context) if not extraction_worker: configuration = configurations.ExtractionConfiguration() configuration.process_archives = process_archives extraction_worker = worker.EventExtractionWorker() extraction_worker.SetExtractionConfiguration(configuration) storage_writer.Open() try: storage_writer.WriteSessionStart() extraction_worker.ProcessPathSpec(mediator, path_spec) event_source = storage_writer.GetFirstWrittenEventSource() while event_source: extraction_worker.ProcessPathSpec(mediator, event_source.path_spec) event_source = storage_writer.GetNextWrittenEventSource() storage_writer.WriteSessionCompletion() if expected_event_counters: self.CheckEventCounters(storage_writer, expected_event_counters) finally: storage_writer.Close()
def testGetDisplayName(self): """Tests the GetDisplayName function.""" knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(knowledge_base_object) storage_writer = fake_writer.FakeStorageWriter() parser_mediator.SetStorageWriter(storage_writer) with self.assertRaises(ValueError): parser_mediator.GetDisplayName(file_entry=None) test_file_path = self._GetTestFilePath(['syslog.gz']) self._SkipIfPathNotExists(test_file_path) os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path) file_entry = path_spec_resolver.Resolver.OpenFileEntry(os_path_spec) display_name = parser_mediator.GetDisplayName(file_entry=file_entry) expected_display_name = 'OS:{0:s}'.format(test_file_path) self.assertEqual(display_name, expected_display_name) gzip_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_GZIP, parent=os_path_spec) file_entry = path_spec_resolver.Resolver.OpenFileEntry(gzip_path_spec) display_name = parser_mediator.GetDisplayName(file_entry=file_entry) expected_display_name = 'GZIP:{0:s}'.format(test_file_path) self.assertEqual(display_name, expected_display_name) test_file_path = self._GetTestFilePath(['vsstest.qcow2']) self._SkipIfPathNotExists(test_file_path) os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path) qcow_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_QCOW, parent=os_path_spec) vshadow_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_VSHADOW, location='/vss2', store_index=1, parent=qcow_path_spec) tsk_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, inode=35, location='/syslog.gz', parent=vshadow_path_spec) file_entry = path_spec_resolver.Resolver.OpenFileEntry(tsk_path_spec) display_name = parser_mediator.GetDisplayName(file_entry=file_entry) expected_display_name = 'VSS2:TSK:/syslog.gz' self.assertEqual(display_name, expected_display_name) parser_mediator.SetTextPrepend('C:') display_name = parser_mediator.GetDisplayName(file_entry=file_entry) expected_display_name = 'VSS2:TSK:C:/syslog.gz' self.assertEqual(display_name, expected_display_name)
def testResetFileEntry(self): """Tests the ResetFileEntry function.""" session = sessions.Session() storage_writer = fake_writer.FakeStorageWriter(session) knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(storage_writer, knowledge_base_object) parser_mediator.ResetFileEntry()
def testSetFileEntry(self): """Tests the SetFileEntry function.""" knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(knowledge_base_object) storage_writer = fake_writer.FakeStorageWriter() parser_mediator.SetStorageWriter(storage_writer) parser_mediator.SetFileEntry(None)
def testSignalAbort(self): """Tests the SignalAbort function.""" knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(knowledge_base_object) storage_writer = fake_writer.FakeStorageWriter() parser_mediator.SetStorageWriter(storage_writer) parser_mediator.SignalAbort()
def testSetStorageWriter(self): """Tests the SetStorageWriter function.""" session = sessions.Session() storage_writer = fake_writer.FakeStorageWriter(session) knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(storage_writer, knowledge_base_object) parser_mediator.SetStorageWriter(None)
def testGetLatestYearFromFileEntry(self): """Tests the _GetLatestYearFromFileEntry function.""" knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(knowledge_base_object) storage_writer = fake_writer.FakeStorageWriter() parser_mediator.SetStorageWriter(storage_writer) latest_year = parser_mediator._GetLatestYearFromFileEntry() self.assertIsNone(latest_year)
def testGetFilename(self): """Tests the GetFilename function.""" knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(knowledge_base_object) storage_writer = fake_writer.FakeStorageWriter() parser_mediator.SetStorageWriter(storage_writer) filename = parser_mediator.GetFilename() self.assertIsNone(filename)
def testGetFileEntry(self): """Tests the GetFileEntry function.""" session = sessions.Session() storage_writer = fake_writer.FakeStorageWriter(session) knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(storage_writer, knowledge_base_object) file_entry = parser_mediator.GetFileEntry() self.assertIsNone(file_entry)
def testGetEarliestYearFromFileEntry(self): """Tests the _GetEarliestYearFromFileEntry function.""" session = sessions.Session() storage_writer = fake_writer.FakeStorageWriter(session) knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(storage_writer, knowledge_base_object) earliest_year = parser_mediator._GetEarliestYearFromFileEntry() self.assertIsNone(earliest_year)
def _CreateExtractionWorker(self, worker_number, filter_object=None, mount_path=None, process_archive_files=False, text_prepend=None): """Creates an extraction worker object. Args: worker_number: a number that identifies the worker. filter_object: optional filter object (instance of objectfilter.Filter). mount_path: optional string containing the mount path. process_archive_files: optional boolean value to indicate if the worker should scan for file entries inside files. text_prepend: optional string that contains the text to prepend to every event object. Returns: An extraction worker (instance of worker.ExtractionWorker). """ parser_mediator = parsers_mediator.ParserMediator( self._event_queue_producer, self._parse_error_queue_producer, self.knowledge_base) resolver_context = context.Context() extraction_worker = SingleProcessEventExtractionWorker( worker_number, self._path_spec_queue, self._event_queue_producer, self._parse_error_queue_producer, parser_mediator, resolver_context=resolver_context, status_update_callback=self._UpdateStatus) # TODO: differentiate between debug output and debug mode. extraction_worker.SetEnableDebugMode(self._enable_debug_output) extraction_worker.SetEnableProfiling( self._enable_profiling, profiling_sample_rate=self._profiling_sample_rate, profiling_type=self._profiling_type) extraction_worker.SetProcessArchiveFiles(process_archive_files) if filter_object: extraction_worker.SetFilterObject(filter_object) if mount_path: extraction_worker.SetMountPath(mount_path) if text_prepend: extraction_worker.SetTextPrepend(text_prepend) return extraction_worker
def testGetLatestYear(self): """Tests the GetLatestYear function.""" knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(knowledge_base_object) storage_writer = fake_writer.FakeStorageWriter() parser_mediator.SetStorageWriter(storage_writer) expected_latest_year = parser_mediator.GetCurrentYear() latest_year = parser_mediator.GetLatestYear() self.assertEqual(latest_year, expected_latest_year)
def testGetEstimatedYear(self): """Tests the GetEstimatedYear function.""" session = sessions.Session() storage_writer = fake_writer.FakeStorageWriter(session) knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(storage_writer, knowledge_base_object) expected_estimated_year = parser_mediator.GetCurrentYear() estimated_year = parser_mediator.GetEstimatedYear() self.assertEqual(estimated_year, expected_estimated_year)
def testProduceRecoveryWarning(self): """Tests the ProduceRecoveryWarning method.""" knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(knowledge_base_object) storage_writer = fake_writer.FakeStorageWriter() parser_mediator.SetStorageWriter(storage_writer) storage_writer.Open() parser_mediator.ProduceRecoveryWarning('test') self.assertEqual(storage_writer.number_of_events, 0) self.assertEqual(storage_writer.number_of_extraction_warnings, 0) self.assertEqual(storage_writer.number_of_recovery_warnings, 1)
def testAnalyzeFileObject(self): """Tests the _AnalyzeFileObject function.""" knowledge_base_values = {'year': 2016} session = sessions.Session() storage_writer = fake_writer.FakeStorageWriter(session) knowledge_base_object = knowledge_base.KnowledgeBase() if knowledge_base_values: for identifier, value in knowledge_base_values.items(): knowledge_base_object.SetValue(identifier, value) resolver_context = context.Context() mediator = parsers_mediator.ParserMediator( storage_writer, knowledge_base_object, preferred_year=2016, resolver_context=resolver_context) extraction_worker = worker.EventExtractionWorker() test_analyzer = analyzers_manager_test.TestAnalyzer() self.assertEqual(len(test_analyzer.GetResults()), 0) extraction_worker._analyzers = [test_analyzer] storage_writer.Open() storage_writer.WriteSessionStart() file_entry = self._GetTestFileEntry(['ímynd.dd']) mediator.SetFileEntry(file_entry) file_object = file_entry.GetFileObject() display_name = mediator.GetDisplayName() event_data_stream = events.EventDataStream() try: extraction_worker._AnalyzeFileObject(file_object, display_name, event_data_stream) finally: file_object.close() storage_writer.WriteSessionCompletion() storage_writer.Close() self.assertIsNotNone(event_data_stream) event_attribute = getattr(event_data_stream, 'test_result', None) self.assertEqual(event_attribute, 'is_vegetable')
def testAnalyzeDataStream(self): """Tests the _AnalyzeDataStream function.""" knowledge_base_values = {'year': 2016} session = sessions.Session() storage_writer = fake_writer.FakeStorageWriter() knowledge_base_object = knowledge_base.KnowledgeBase() if knowledge_base_values: for identifier, value in knowledge_base_values.items(): knowledge_base_object.SetValue(identifier, value) resolver_context = context.Context() parser_mediator = parsers_mediator.ParserMediator( knowledge_base_object, resolver_context=resolver_context) parser_mediator.SetPreferredYear(2016) parser_mediator.SetStorageWriter(storage_writer) extraction_worker = worker.EventExtractionWorker() test_analyzer = analyzers_manager_test.TestAnalyzer() self.assertEqual(len(test_analyzer.GetResults()), 0) extraction_worker._analyzers = [test_analyzer] storage_writer.Open() session_start = session.CreateSessionStart() storage_writer.AddAttributeContainer(session_start) file_entry = self._GetTestFileEntry(['syslog.tgz']) parser_mediator.SetFileEntry(file_entry) display_name = parser_mediator.GetDisplayName() event_data_stream = events.EventDataStream() extraction_worker._AnalyzeDataStream(file_entry, '', display_name, event_data_stream) session_completion = session.CreateSessionCompletion() storage_writer.AddAttributeContainer(session_completion) storage_writer.Close() self.assertIsNotNone(event_data_stream) event_attribute = getattr(event_data_stream, 'test_result', None) self.assertEqual(event_attribute, 'is_vegetable')
def _CreateParserMediator(self, session, storage_writer, collection_filters_helper=None, file_entry=None, knowledge_base_values=None, parser_chain=None, timezone='UTC'): """Creates a parser mediator. Args: session (Session): session. storage_writer (StorageWriter): storage writer. collection_filters_helper (Optional[CollectionFiltersHelper]): collection filters helper. file_entry (Optional[dfvfs.FileEntry]): file entry object being parsed. knowledge_base_values (Optional[dict]): knowledge base values. parser_chain (Optional[str]): parsing chain up to this point. timezone (Optional[str]): timezone. Returns: ParserMediator: parser mediator. """ knowledge_base_object = knowledge_base.KnowledgeBase() if knowledge_base_values: for identifier, value in knowledge_base_values.items(): if identifier == 'codepage': knowledge_base_object.SetCodepage(value) else: knowledge_base_object.SetValue(identifier, value) knowledge_base_object.SetTimeZone(timezone) parser_mediator = parsers_mediator.ParserMediator( session, storage_writer, knowledge_base_object, collection_filters_helper=collection_filters_helper) if file_entry: parser_mediator.SetFileEntry(file_entry) if parser_chain: parser_mediator.parser_chain = parser_chain return parser_mediator
def testGetDisplayNameForPathSpec(self): """Tests the GetDisplayNameForPathSpec function.""" knowledge_base_object = knowledge_base.KnowledgeBase() parser_mediator = mediator.ParserMediator(knowledge_base_object) storage_writer = fake_writer.FakeStorageWriter() parser_mediator.SetStorageWriter(storage_writer) test_file_path = self._GetTestFilePath(['syslog.gz']) self._SkipIfPathNotExists(test_file_path) os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path) expected_display_name = 'OS:{0:s}'.format(test_file_path) display_name = parser_mediator.GetDisplayNameForPathSpec(os_path_spec) self.assertEqual(display_name, expected_display_name)
def CreateExtractionWorker(self, worker_number): """Creates an extraction worker object. Args: worker_number: A number that identifies the worker. Returns: An extraction worker (instance of worker.ExtractionWorker). """ parser_mediator = parsers_mediator.ParserMediator( self._event_queue_producer, self._parse_error_queue_producer, self.knowledge_base) resolver_context = context.Context() extraction_worker = SingleProcessEventExtractionWorker( worker_number, self._collection_queue, self._event_queue_producer, self._parse_error_queue_producer, parser_mediator, resolver_context=resolver_context) extraction_worker.SetEnableDebugOutput(self._enable_debug_output) # TODO: move profiler in separate object. extraction_worker.SetEnableProfiling( self._enable_profiling, profiling_sample_rate=self._profiling_sample_rate) if self._process_archive_files: extraction_worker.SetProcessArchiveFiles( self._process_archive_files) if self._filter_object: extraction_worker.SetFilterObject(self._filter_object) if self._mount_path: extraction_worker.SetMountPath(self._mount_path) if self._text_prepend: extraction_worker.SetTextPrepend(self._text_prepend) return extraction_worker
def CreateParserMediator(self, event_queue=None): """Create a parser mediator object. Args: event_queue: an optional event queue object (instance of Queue). Returns: A parser mediator object (instance of parsers_mediator.ParserMediator). """ if event_queue is None: event_queue = single_process.SingleProcessQueue() event_queue_producer = queue.ItemQueueProducer(event_queue) parse_error_queue = single_process.SingleProcessQueue() parse_error_queue_producer = queue.ItemQueueProducer(parse_error_queue) return parsers_mediator.ParserMediator(event_queue_producer, parse_error_queue_producer, self.knowledge_base_object)
def _CreateParserMediator(self, storage_writer, artifacts_filter_helper=None, file_entry=None, knowledge_base_values=None, parser_chain=None, timezone='UTC'): """Creates a parser mediator. Args: storage_writer (StorageWriter): storage writer. artifacts_filter_helper (Optional[ArtifactDefinitionsFilterHelper]): artifacts definitions filter helper. file_entry (Optional[dfvfs.FileEntry]): file entry object being parsed. knowledge_base_values (Optional[dict]): knowledge base values. parser_chain (Optional[str]): parsing chain up to this point. timezone (str): timezone. Returns: ParserMediator: parser mediator. """ knowledge_base_object = knowledge_base.KnowledgeBase() if knowledge_base_values: for identifier, value in iter(knowledge_base_values.items()): if identifier == 'codepage': knowledge_base_object.SetCodepage(value) else: knowledge_base_object.SetValue(identifier, value) knowledge_base_object.SetTimeZone(timezone) parser_mediator = mediator.ParserMediator( storage_writer, knowledge_base_object, artifacts_filter_helper=artifacts_filter_helper) if file_entry: parser_mediator.SetFileEntry(file_entry) if parser_chain: parser_mediator.parser_chain = parser_chain return parser_mediator
def testGetCompressedStreamTypes(self): """Tests the _GetCompressedStreamTypes function.""" knowledge_base_values = {'year': 2016} session = sessions.Session() storage_writer = fake_writer.FakeStorageWriter() knowledge_base_object = knowledge_base.KnowledgeBase() if knowledge_base_values: for identifier, value in knowledge_base_values.items(): knowledge_base_object.SetValue(identifier, value) resolver_context = context.Context() parser_mediator = parsers_mediator.ParserMediator( knowledge_base_object, resolver_context=resolver_context) parser_mediator.SetPreferredYear(2016) parser_mediator.SetStorageWriter(storage_writer) extraction_worker = worker.EventExtractionWorker() test_analyzer = analyzers_manager_test.TestAnalyzer() self.assertEqual(len(test_analyzer.GetResults()), 0) extraction_worker._analyzers = [test_analyzer] storage_writer.Open() session_start = session.CreateSessionStart() storage_writer.AddAttributeContainer(session_start) extraction_worker = worker.EventExtractionWorker() path_spec = self._GetTestFilePathSpec(['syslog.tgz']) type_indicators = extraction_worker._GetCompressedStreamTypes( parser_mediator, path_spec) self.assertEqual(type_indicators, [dfvfs_definitions.TYPE_INDICATOR_GZIP]) session_completion = session.CreateSessionCompletion() storage_writer.AddAttributeContainer(session_completion) storage_writer.Close()
def CreateExtractionWorker(self, worker_number): """Creates an extraction worker object. Args: worker_number: A number that identifies the worker. Returns: An extraction worker (instance of worker.ExtractionWorker). """ parser_mediator = parsers_mediator.ParserMediator( self._event_queue_producer, self._parse_error_queue_producer, self.knowledge_base) # We need a resolver context per process to prevent multi processing # issues with file objects stored in images. resolver_context = context.Context() extraction_worker = worker.BaseEventExtractionWorker( worker_number, self._collection_queue, self._event_queue_producer, self._parse_error_queue_producer, parser_mediator, resolver_context=resolver_context) extraction_worker.SetEnableDebugOutput(self._enable_debug_output) extraction_worker.SetEnableProfiling( self._enable_profiling, profiling_sample_rate=self._profiling_sample_rate, profiling_type=self._profiling_type) if self._process_archive_files: extraction_worker.SetProcessArchiveFiles(self._process_archive_files) if self._filter_object: extraction_worker.SetFilterObject(self._filter_object) if self._mount_path: extraction_worker.SetMountPath(self._mount_path) if self._text_prepend: extraction_worker.SetTextPrepend(self._text_prepend) return extraction_worker
def _TestProcessPathSpec(self, storage_writer, path_spec, extraction_worker=None, knowledge_base_values=None, process_archives=False): """Tests processing a path specification. Args: storage_writer (StorageWriter): storage writer. path_spec (dfvfs.PathSpec): path specification. extraction_worker (Optional[EventExtractorWorker]): worker to process the pathspec. If None, a new worker will be created. knowledge_base_values (Optional[dict]): knowledge base values. process_archives (Optional[bool]): whether archive files should be processed. """ knowledge_base_object = knowledge_base.KnowledgeBase() if knowledge_base_values: for identifier, value in iter(knowledge_base_values.items()): knowledge_base_object.SetValue(identifier, value) mediator = parsers_mediator.ParserMediator(storage_writer, knowledge_base_object) if not extraction_worker: resolver_context = context.Context() extraction_worker = worker.EventExtractionWorker( resolver_context, process_archives=process_archives) storage_writer.Open() storage_writer.WriteSessionStart() extraction_worker.ProcessPathSpec(mediator, path_spec) event_source = storage_writer.GetFirstWrittenEventSource() while event_source: extraction_worker.ProcessPathSpec(mediator, event_source.path_spec) event_source = storage_writer.GetNextWrittenEventSource() storage_writer.WriteSessionCompletion() storage_writer.Close()
def _ParseFile(self, path_segments, parser, knowledge_base_object): """Parses a file using the parser. Args: path_segments (list[str]): path segments inside the test data directory. parser (BaseParser): parser. knowledge_base_object (KnowledgeBase): knowledge base. Returns: FakeStorageWriter: storage writer. Raises: SkipTest: if the path inside the test data directory does not exist and the test should be skipped. """ session = sessions.Session() storage_writer = fake_writer.FakeStorageWriter(session) storage_writer.Open() parser_mediator = parsers_mediator.ParserMediator( storage_writer, knowledge_base_object) file_entry = self._GetTestFileEntry(path_segments) parser_mediator.SetFileEntry(file_entry) event_data_stream = events.EventDataStream() parser_mediator.ProduceEventDataStream(event_data_stream) if isinstance(parser, parsers_interface.FileEntryParser): parser.Parse(parser_mediator) elif isinstance(parser, parsers_interface.FileObjectParser): file_object = file_entry.GetFileObject() try: parser.Parse(parser_mediator, file_object) finally: file_object.close() else: self.fail('Got unexpected parser type: {0!s}'.format(type(parser))) return storage_writer