コード例 #1
0
    def testProcessSources(self):
        """Tests the ProcessSources function."""
        test_artifacts_path = self._GetTestFilePath(['artifacts'])
        self._SkipIfPathNotExists(test_artifacts_path)

        test_file_path = self._GetTestFilePath(['ímynd.dd'])
        self._SkipIfPathNotExists(test_file_path)

        test_engine = extraction_engine.SingleProcessEngine()
        resolver_context = context.Context()

        os_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path)
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location='/',
            parent=os_path_spec)

        source_configuration = artifacts.SourceConfigurationArtifact(
            path_spec=source_path_spec)

        session = sessions.Session()

        configuration = configurations.ProcessingConfiguration()
        configuration.parser_filter_expression = 'filestat'

        storage_writer = fake_writer.FakeStorageWriter()
        storage_writer.Open()

        try:
            test_engine.PreprocessSources(test_artifacts_path, None,
                                          [source_path_spec], session,
                                          storage_writer)

            processing_status = test_engine.ProcessSources(
                [source_configuration], storage_writer, resolver_context,
                configuration)

            parsers_counter = collections.Counter({
                parser_count.name: parser_count.number_of_events
                for parser_count in storage_writer.GetAttributeContainers(
                    'parser_count')
            })

        finally:
            storage_writer.Close()

        self.assertFalse(processing_status.aborted)

        self.assertEqual(storage_writer.number_of_events, 15)
        self.assertEqual(storage_writer.number_of_extraction_warnings, 0)
        self.assertEqual(storage_writer.number_of_recovery_warnings, 0)

        expected_parsers_counter = collections.Counter({
            'filestat': 15,
            'total': 15
        })
        self.assertEqual(parsers_counter, expected_parsers_counter)
コード例 #2
0
ファイル: extraction_engine.py プロジェクト: dfjxs/plaso
    def testProcessSources(self):
        """Tests the ProcessSources function."""
        test_artifacts_path = self._GetTestFilePath(['artifacts'])
        self._SkipIfPathNotExists(test_artifacts_path)

        test_file_path = self._GetTestFilePath(['ímynd.dd'])
        self._SkipIfPathNotExists(test_file_path)

        registry = artifacts_registry.ArtifactDefinitionsRegistry()
        reader = artifacts_reader.YamlArtifactsReader()
        registry.ReadFromDirectory(reader, test_artifacts_path)

        test_engine = extraction_engine.SingleProcessEngine()
        resolver_context = context.Context()

        os_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path)
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location='/',
            parent=os_path_spec)

        session = sessions.Session()

        configuration = configurations.ProcessingConfiguration()
        configuration.parser_filter_expression = 'filestat'

        storage_writer = fake_writer.FakeStorageWriter()
        storage_writer.Open()

        try:
            test_engine.PreprocessSources(registry, [source_path_spec],
                                          session, storage_writer)

            test_engine.ProcessSources(session, [source_path_spec],
                                       storage_writer, resolver_context,
                                       configuration)

        finally:
            storage_writer.Close()

        self.assertEqual(storage_writer.number_of_events, 15)
        self.assertEqual(storage_writer.number_of_extraction_warnings, 0)
        self.assertEqual(storage_writer.number_of_recovery_warnings, 0)

        expected_parsers_counter = collections.Counter({
            'filestat': 15,
            'total': 15
        })
        self.assertEqual(session.parsers_counter, expected_parsers_counter)
コード例 #3
0
ファイル: extraction_tool.py プロジェクト: dfjxs/plaso
    def _ProcessSources(self, session, storage_writer):
        """Processes the sources and extract events.

    Args:
      session (Session): session in which the sources are processed.
      storage_writer (StorageWriter): storage writer for a session storage.

    Returns:
      ProcessingStatus: processing status.

    Raises:
      BadConfigOption: if an invalid collection filter was specified.
    """
        is_archive = False
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            is_archive = self._IsArchiveFile(self._source_path_specs[0])
            if is_archive:
                self._source_type = definitions.SOURCE_TYPE_ARCHIVE

        single_process_mode = self._single_process_mode
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            if not self._process_archives or not is_archive:
                single_process_mode = True

        if single_process_mode:
            extraction_engine = single_extraction_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine(
                number_of_worker_processes=self._number_of_extraction_workers,
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

        # If the source is a directory or a storage media image
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine, session, storage_writer)

        configuration = self._CreateProcessingConfiguration(
            extraction_engine.knowledge_base)

        session.enabled_parser_names = (
            configuration.parser_filter_expression.split(','))
        session.parser_filter_expression = self._parser_filter_expression

        self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

        # TODO: set mount path in knowledge base with
        # extraction_engine.knowledge_base.SetMountPath()
        extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend)

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        # TODO: decouple session and storage writer?
        session.source_configurations = (
            extraction_engine.knowledge_base.GetSourceConfigurationArtifacts())

        storage_writer.WriteSessionConfiguration(session)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        if single_process_mode:
            force_parser = False
            number_of_parsers = len(
                configuration.parser_filter_expression.split(','))
            if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE
                    and not is_archive and number_of_parsers == 1):
                force_parser = True

            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                force_parser=force_parser,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            # The following overrides are needed because pylint 2.6.0 gets confused
            # about which ProcessSources to check against.
            # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                status_update_callback=status_update_callback,
                storage_file_path=self._storage_file_path)

        return processing_status
コード例 #4
0
    def _ProcessSources(self, session, storage_writer):
        """Processes the sources and extract events.

    Args:
      session (Session): session in which the sources are processed.
      storage_writer (StorageWriter): storage writer for a session storage.

    Returns:
      ProcessingStatus: processing status.

    Raises:
      BadConfigOption: if an invalid collection filter was specified.
    """
        is_archive = False
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            is_archive = self._IsArchiveFile(self._source_path_specs[0])
            if is_archive:
                self._source_type = definitions.SOURCE_TYPE_ARCHIVE

        single_process_mode = self._single_process_mode
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            if not self._process_archives or not is_archive:
                single_process_mode = True

        if single_process_mode:
            extraction_engine = single_extraction_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine(
                number_of_worker_processes=self._number_of_extraction_workers,
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

        # If the source is a directory or a storage media image
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine, session, storage_writer)

        self._expanded_parser_filter_expression = (
            self._GetExpandedParserFilterExpression(
                extraction_engine.knowledge_base))

        enabled_parser_names = self._expanded_parser_filter_expression.split(
            ',')

        number_of_enabled_parsers = len(enabled_parser_names)

        force_parser = False
        if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE
                and not is_archive and number_of_enabled_parsers == 1):
            force_parser = True

            self._extract_winevt_resources = False

        elif ('winevt' not in enabled_parser_names
              and 'winevtx' not in enabled_parser_names):
            self._extract_winevt_resources = False

        elif (self._extract_winevt_resources
              and 'pe' not in enabled_parser_names):
            logger.warning(
                'A Windows EventLog parser is enabled in combination with '
                'extraction of Windows EventLog resources, but the Portable '
                'Executable (PE) parser is disabled. Therefore Windows EventLog '
                'resources cannot be extracted.')

            self._extract_winevt_resources = False

        configuration = self._CreateExtractionProcessingConfiguration()

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        session_configuration = self._CreateExtractionSessionConfiguration(
            session, enabled_parser_names)

        storage_writer.AddAttributeContainer(session_configuration)

        source_configurations = []
        for path_spec in self._source_path_specs:
            source_configuration = artifacts.SourceConfigurationArtifact(
                path_spec=path_spec)
            source_configurations.append(source_configuration)

        # TODO: improve to detect more than 1 system configurations.
        # TODO: improve to add volumes to system configuration.
        system_configuration = (
            extraction_engine.knowledge_base.GetSystemConfigurationArtifact())
        storage_writer.AddAttributeContainer(system_configuration)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        if single_process_mode:
            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                source_configurations,
                storage_writer,
                self._resolver_context,
                configuration,
                force_parser=force_parser,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            # The following overrides are needed because pylint 2.6.0 gets confused
            # about which ProcessSources to check against.
            # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
            processing_status = extraction_engine.ProcessSources(
                source_configurations,
                storage_writer,
                session.identifier,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                status_update_callback=status_update_callback,
                storage_file_path=self._storage_file_path)

        return processing_status