예제 #1
0
    def _CreateEngine(self, single_process_mode):
        """Creates an engine based on the front end settings.

    Args:
      single_process_mode (bool): True if the front-end should run in single
          process mode.

    Returns:
      BaseEngine: engine.
    """
        if single_process_mode:
            engine = single_process.SingleProcessEngine(
                debug_output=self._debug_mode,
                enable_profiling=self._enable_profiling,
                profiling_directory=self._profiling_directory,
                profiling_sample_rate=self._profiling_sample_rate,
                profiling_type=self._profiling_type)
        else:
            engine = multi_process_engine.TaskMultiProcessEngine(
                debug_output=self._debug_mode,
                enable_profiling=self._enable_profiling,
                profiling_directory=self._profiling_directory,
                profiling_sample_rate=self._profiling_sample_rate,
                profiling_type=self._profiling_type,
                use_zeromq=self._use_zeromq)

        return engine
예제 #2
0
    def testProcessSources(self):
        """Tests the ProcessSources function."""
        test_artifacts_path = self._GetTestFilePath(['artifacts'])
        self._SkipIfPathNotExists(test_artifacts_path)

        test_file_path = self._GetTestFilePath(['ímynd.dd'])
        self._SkipIfPathNotExists(test_file_path)

        registry = artifacts_registry.ArtifactDefinitionsRegistry()
        reader = artifacts_reader.YamlArtifactsReader()
        registry.ReadFromDirectory(reader, test_artifacts_path)

        test_engine = single_process.SingleProcessEngine()
        resolver_context = context.Context()
        session = sessions.Session()

        os_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path)
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location='/',
            parent=os_path_spec)

        test_engine.PreprocessSources(registry, [source_path_spec])

        storage_writer = fake_writer.FakeStorageWriter(session)

        configuration = configurations.ProcessingConfiguration()
        configuration.parser_filter_expression = 'filestat'

        test_engine.ProcessSources([source_path_spec], storage_writer,
                                   resolver_context, configuration)

        self.assertEqual(storage_writer.number_of_events, 15)
예제 #3
0
    def testProcessSources(self):
        """Tests the ProcessSources function."""
        test_engine = single_process.SingleProcessEngine()
        resolver_context = context.Context()
        session = sessions.Session()

        source_path = self._GetTestFilePath([u'ímynd.dd'])
        os_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location=u'/',
            parent=os_path_spec)

        test_engine.PreprocessSources([source_path_spec])

        storage_writer = fake_storage.FakeStorageWriter(session)

        configuration = configurations.ProcessingConfiguration()
        configuration.parser_filter_expression = u'filestat'

        test_engine.ProcessSources([source_path_spec], storage_writer,
                                   resolver_context, configuration)

        self.assertEqual(len(storage_writer.events), 15)
예제 #4
0
  def _InitializeSingleProcessModeEngine(self):
    """Initializes the single process mode engine.

    Returns:
      The engine object (instance of Engine).
    """
    engine = single_process.SingleProcessEngine(self._queue_size)
    engine.SetEnableDebugOutput(self._debug_mode)
    engine.SetEnableProfiling(
        self._enable_profiling,
        profiling_sample_rate=self._profiling_sample_rate,
        profiling_type=self._profiling_type)
    engine.SetProcessArchiveFiles(self._process_archive_files)

    if self._filter_object:
      engine.SetFilterObject(self._filter_object)

    if self._mount_path:
      engine.SetMountPath(self._mount_path)

    if self._text_prepend:
      engine.SetTextPrepend(self._text_prepend)

    # TODO: add support to handle multiple partitions.
    engine.SetSource(
        self.GetSourcePathSpec(), resolver_context=self._resolver_context)

    return engine
예제 #5
0
  def testCreateExtractionWorker(self):
    """Tests the CreateExtractionWorker function."""
    test_engine = single_process.SingleProcessEngine(
        maximum_number_of_queued_items=100)

    test_extraction_worker = test_engine._CreateExtractionWorker(0)
    self.assertNotEqual(test_extraction_worker, None)
    self.assertIsInstance(
        test_extraction_worker,
        single_process.SingleProcessEventExtractionWorker)
예제 #6
0
  def testCreateCollector(self):
    """Tests the CreateCollector function."""
    resolver_context = context.Context()
    test_engine = single_process.SingleProcessEngine(
        maximum_number_of_queued_items=100)

    test_collector = test_engine._CreateCollector(
        filter_find_specs=None, include_directory_stat=False,
        resolver_context=resolver_context)
    self.assertNotEqual(test_collector, None)
    self.assertIsInstance(
        test_collector, single_process.SingleProcessCollector)
예제 #7
0
    def _CreateEngine(self, single_process_mode):
        """Creates an engine based on the front end settings.

    Args:
      single_process_mode (bool): True if the front-end should run in single
          process mode.

    Returns:
      BaseEngine: engine.
    """
        if single_process_mode:
            engine = single_process.SingleProcessEngine()
        else:
            engine = multi_process_engine.TaskMultiProcessEngine(
                use_zeromq=self._use_zeromq)

        return engine
예제 #8
0
    def testEngine(self):
        """Test the engine functionality."""
        resolver_context = context.Context()
        test_engine = single_process.SingleProcessEngine(
            maximum_number_of_queued_items=25000)

        self.assertNotEqual(test_engine, None)

        source_path = os.path.join(self._TEST_DATA_PATH, u'ímynd.dd')
        os_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location=u'/',
            parent=os_path_spec)

        test_engine.SetSource(source_path_spec,
                              resolver_context=resolver_context)

        self.assertFalse(test_engine.SourceIsDirectory())
        self.assertFalse(test_engine.SourceIsFile())
        self.assertTrue(test_engine.SourceIsStorageMediaImage())

        test_searcher = test_engine.GetSourceFileSystemSearcher(
            resolver_context=resolver_context)
        self.assertNotEqual(test_searcher, None)
        self.assertIsInstance(test_searcher,
                              file_system_searcher.FileSystemSearcher)

        test_engine.PreprocessSource('Windows')

        test_collector = test_engine.CreateCollector(
            False,
            vss_stores=None,
            filter_find_specs=None,
            resolver_context=resolver_context)
        self.assertNotEqual(test_collector, None)
        self.assertIsInstance(test_collector,
                              single_process.SingleProcessCollector)

        test_extraction_worker = test_engine.CreateExtractionWorker(0)
        self.assertNotEqual(test_extraction_worker, None)
        self.assertIsInstance(
            test_extraction_worker,
            single_process.SingleProcessEventExtractionWorker)
예제 #9
0
  def testProcessSources(self):
    """Tests the PreprocessSource and ProcessSources function."""
    test_engine = single_process.SingleProcessEngine(
        maximum_number_of_queued_items=100)

    source_path = os.path.join(self._TEST_DATA_PATH, u'ímynd.dd')
    os_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
    source_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
        parent=os_path_spec)

    test_engine.PreprocessSource([source_path_spec], u'Windows')

    parser_filter_string = u'filestat'

    storage_writer = test_lib.TestStorageWriter(test_engine.event_object_queue)
    test_engine.ProcessSources(
        [source_path_spec], storage_writer,
        parser_filter_string=parser_filter_string)

    self.assertEqual(len(storage_writer.event_objects), 15)
예제 #10
0
  def testGetSourceFileSystem(self):
    """Tests the GetSourceFileSystem function."""
    resolver_context = context.Context()
    test_engine = single_process.SingleProcessEngine(
        maximum_number_of_queued_items=100)

    source_path = os.path.join(self._TEST_DATA_PATH, u'ímynd.dd')
    os_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
    source_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
        parent=os_path_spec)

    test_file_system, test_mount_point = test_engine.GetSourceFileSystem(
        source_path_spec, resolver_context=resolver_context)

    self.assertNotEqual(test_file_system, None)
    self.assertIsInstance(test_file_system, file_system.FileSystem)

    self.assertNotEqual(test_mount_point, None)
    self.assertIsInstance(test_mount_point, path_spec.PathSpec)

    test_file_system.Close()
예제 #11
0
    def ExtractEventsFromSources(self):
        """Processes the sources and extract events.

    This is a stripped down copy of tools/log2timeline.py that doesn't support
    the full set of flags. The defaults for these are hard coded in the
    constructor of this class.

    Raises:
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path)

        scan_context = self.ScanSource()
        source_type = scan_context.source_type

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(self._source_path,
                                               source_type,
                                               filter_file=self._filter_file)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        self._output_writer.Write(u'\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write(u'Processing started.\n')

        session = engine.BaseEngine.CreateSession(
            command_line_arguments=self._command_line_arguments,
            filter_file=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year)

        storage_writer = storage_zip_file.ZIPStorageFileWriter(
            session, self._storage_file_path)

        configuration = self._CreateProcessingConfiguration()

        single_process_mode = self._single_process_mode
        if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            single_process_mode = True

        if single_process_mode:
            extraction_engine = single_process_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_process_engine.TaskMultiProcessEngine(
                use_zeromq=self._use_zeromq)

        # If the source is a directory or a storage media image
        # run pre-processing.
        if (self._force_preprocessing
                or source_type in self._SOURCE_TYPES_TO_PREPROCESS):
            self._PreprocessSources(extraction_engine)

        if not configuration.parser_filter_expression:
            operating_system = extraction_engine.knowledge_base.GetValue(
                u'operating_system')
            operating_system_product = extraction_engine.knowledge_base.GetValue(
                u'operating_system_product')
            operating_system_version = extraction_engine.knowledge_base.GetValue(
                u'operating_system_version')
            parser_filter_expression = (
                self._parsers_manager.GetPresetForOperatingSystem(
                    operating_system, operating_system_product,
                    operating_system_version))

            if parser_filter_expression:
                logging.info(
                    u'Parser filter expression changed to: {0:s}'.format(
                        parser_filter_expression))

            configuration.parser_filter_expression = parser_filter_expression
            session.enabled_parser_names = list(
                self._parsers_manager.GetParserAndPluginNames(
                    parser_filter_expression=configuration.
                    parser_filter_expression))
            session.parser_filter_expression = configuration.parser_filter_expression

        if session.preferred_time_zone:
            try:
                extraction_engine.knowledge_base.SetTimeZone(
                    session.preferred_time_zone)
            except ValueError:
                # pylint: disable=protected-access
                logging.warning(
                    u'Unsupported time zone: {0:s}, defaulting to {1:s}'.
                    format(session.preferred_time_zone,
                           extraction_engine.knowledge_base._time_zone.zone))

        filter_find_specs = None
        if configuration.filter_file:
            environment_variables = (
                extraction_engine.knowledge_base.GetEnvironmentVariables())
            filter_find_specs = frontend_utils.BuildFindSpecsFromFile(
                configuration.filter_file,
                environment_variables=environment_variables)

        processing_status = None
        if single_process_mode:
            logging.debug(u'Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                filter_find_specs=filter_find_specs,
                status_update_callback=status_update_callback)

        else:
            logging.debug(u'Starting extraction in multi process mode.')

            processing_status = extraction_engine.ProcessSources(
                session.identifier,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                filter_find_specs=filter_find_specs,
                number_of_worker_processes=self._number_of_extraction_workers,
                status_update_callback=status_update_callback)

        self._status_view.PrintExtractionSummary(processing_status)
예제 #12
0
    def ProcessSources(
            self,
            source_path_specs,
            source_type,
            enable_sigsegv_handler=False,
            filter_file=None,
            hasher_names_string=None,
            parser_filter_string=None,
            preferred_encoding=u'utf-8',
            single_process_mode=False,
            status_update_callback=None,
            storage_serializer_format=definitions.SERIALIZER_FORMAT_PROTOBUF,
            timezone=pytz.UTC):
        """Processes the sources.

    Args:
      source_path_specs: list of path specifications (instances of
                         dfvfs.PathSpec) to process.
      source_type: the dfVFS source type definition.
      enable_sigsegv_handler: optional boolean value to indicate the SIGSEGV
                              handler should be enabled. The default is False.
      filter_file: optional path to a file that contains find specifications.
                   The default is None.
      hasher_names_string: optional comma separated string of names of
                           hashers to enable. The default is None.
      parser_filter_string: optional parser filter string. The default is None.
      preferred_encoding: optional preferred encoding. The default is UTF-8.
      single_process_mode: optional boolean value to indicate if the front-end
                           should run in single process mode. The default is
                           False.
      status_update_callback: optional callback function for status updates.
                              The default is None.
      storage_serializer_format: optional storage serializer format.
                                 The default is protobuf.
      timezone: optional preferred timezone. The default is UTC.

    Returns:
      The processing status (instance of ProcessingStatus) or None.

    Raises:
      SourceScannerError: if the source scanner could not find a supported
                          file system.
      UserAbort: if the user initiated an abort.
    """
        # If the source is a directory or a storage media image
        # run pre-processing.
        # TODO: move source_scanner.SourceScannerContext.SOURCE_TYPE_
        # to definitions.SOURCE_TYPE_.
        if source_type in [
                source_scanner.SourceScannerContext.SOURCE_TYPE_DIRECTORY,
                source_scanner.SourceScannerContext.
                SOURCE_TYPE_STORAGE_MEDIA_DEVICE, source_scanner.
                SourceScannerContext.SOURCE_TYPE_STORAGE_MEDIA_IMAGE
        ]:
            self.SetEnablePreprocessing(True)
        else:
            self.SetEnablePreprocessing(False)

        self._CheckStorageFile(self._storage_file_path)

        self._single_process_mode = single_process_mode
        # TODO: move source_scanner.SourceScannerContext.SOURCE_TYPE_
        # to definitions.SOURCE_TYPE_.
        if source_type == source_scanner.SourceScannerContext.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            self._single_process_mode = True

        if self._single_process_mode:
            self._engine = single_process.SingleProcessEngine(self._queue_size)
        else:
            self._engine = multi_process.MultiProcessEngine(
                maximum_number_of_queued_items=self._queue_size)

        self._engine.SetEnableDebugOutput(self._debug_mode)
        self._engine.SetEnableProfiling(
            self._enable_profiling,
            profiling_sample_rate=self._profiling_sample_rate,
            profiling_type=self._profiling_type)

        pre_obj = self._PreprocessSource(source_path_specs, source_type)

        self._operating_system = getattr(pre_obj, u'guessed_os', None)

        if not parser_filter_string:
            guessed_os = self._operating_system
            os_version = getattr(pre_obj, u'osversion', u'')
            parser_filter_string = self._GetParserFilterPreset(
                os_guess=guessed_os, os_version=os_version)

            if parser_filter_string:
                logging.info(
                    u'Parser filter expression changed to: {0:s}'.format(
                        parser_filter_string))

        self._parser_names = []
        for _, parser_class in parsers_manager.ParsersManager.GetParsers(
                parser_filter_string=parser_filter_string):
            self._parser_names.append(parser_class.NAME)

        if u'filestat' in self._parser_names:
            include_directory_stat = True
        else:
            include_directory_stat = False

        self._hasher_names = []
        hasher_manager = hashers_manager.HashersManager
        for hasher_name in hasher_manager.GetHasherNamesFromString(
                hasher_names_string=hasher_names_string):
            self._hasher_names.append(hasher_name)

        self._PreprocessSetTimezone(pre_obj, timezone=timezone)

        if filter_file:
            filter_find_specs = engine_utils.BuildFindSpecsFromFile(
                filter_file, pre_obj=pre_obj)
        else:
            filter_find_specs = None

        self._PreprocessSetCollectionInformation(
            pre_obj,
            source_type,
            self._engine,
            filter_file=filter_file,
            parser_filter_string=parser_filter_string,
            preferred_encoding=preferred_encoding)

        if self._output_module:
            storage_writer = storage.BypassStorageWriter(
                self._engine.event_object_queue,
                self._storage_file_path,
                output_module_string=self._output_module,
                pre_obj=pre_obj)
        else:
            storage_writer = storage.FileStorageWriter(
                self._engine.event_object_queue,
                self._storage_file_path,
                buffer_size=self._buffer_size,
                pre_obj=pre_obj,
                serializer_format=storage_serializer_format)

            storage_writer.SetEnableProfiling(
                self._enable_profiling, profiling_type=self._profiling_type)

        processing_status = None
        try:
            if self._single_process_mode:
                logging.debug(u'Starting extraction in single process mode.')

                processing_status = self._engine.ProcessSources(
                    source_path_specs,
                    storage_writer,
                    filter_find_specs=filter_find_specs,
                    filter_object=self._filter_object,
                    hasher_names_string=hasher_names_string,
                    include_directory_stat=include_directory_stat,
                    mount_path=self._mount_path,
                    parser_filter_string=parser_filter_string,
                    process_archive_files=self._process_archive_files,
                    resolver_context=self._resolver_context,
                    status_update_callback=status_update_callback,
                    text_prepend=self._text_prepend)

            else:
                logging.debug(u'Starting extraction in multi process mode.')

                # TODO: pass number_of_extraction_workers.
                processing_status = self._engine.ProcessSources(
                    source_path_specs,
                    storage_writer,
                    enable_sigsegv_handler=enable_sigsegv_handler,
                    filter_find_specs=filter_find_specs,
                    filter_object=self._filter_object,
                    hasher_names_string=hasher_names_string,
                    include_directory_stat=include_directory_stat,
                    mount_path=self._mount_path,
                    parser_filter_string=parser_filter_string,
                    process_archive_files=self._process_archive_files,
                    status_update_callback=status_update_callback,
                    show_memory_usage=self._show_worker_memory_information,
                    text_prepend=self._text_prepend)

        except KeyboardInterrupt:
            self._CleanUpAfterAbort()
            raise errors.UserAbort

        # TODO: check if this still works and if still needed.
        except Exception as exception:
            if not self._single_process_mode:
                raise

            # The tool should generally not be run in single process mode
            # for other reasons than to debug. Hence the general error
            # catching.
            logging.error(
                u'An uncaught exception occurred: {0:s}.\n{1:s}'.format(
                    exception, traceback.format_exc()))
            if self._debug_mode:
                pdb.post_mortem()

        return processing_status
예제 #13
0
 def setUp(self):
   """Makes preparations before running an individual test."""
   self._test_engine = single_process.SingleProcessEngine(
       maximum_number_of_queued_items=100)
예제 #14
0
    def ExtractEventsFromSources(self):
        """Processes the sources and extract events.

    This is a stripped down copy of tools/log2timeline.py that doesn't support
    the full set of flags. The defaults for these are hard coded in the
    constructor of this class.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported or an invalid collection filter was specified.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path,
                               warn_about_existing=True)

        scan_context = self.ScanSource(self._source_path)
        source_type = scan_context.source_type

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(
            self._source_path,
            source_type,
            artifact_filters=self._artifact_filters,
            filter_file=self._filter_file)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        self._output_writer.Write('\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write('Processing started.\n')

        session = engine.BaseEngine.CreateSession(
            artifact_filter_names=self._artifact_filters,
            command_line_arguments=self._command_line_arguments,
            filter_file_path=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year)

        storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
            self._storage_format, session, self._storage_file_path)
        if not storage_writer:
            raise errors.BadConfigOption(
                'Unsupported storage format: {0:s}'.format(
                    self._storage_format))

        single_process_mode = self._single_process_mode
        if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            single_process_mode = True

        if single_process_mode:
            extraction_engine = single_process_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_process_engine.TaskMultiProcessEngine()

        # If the source is a directory or a storage media image
        # run pre-processing.
        if source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine)

        configuration = self._CreateProcessingConfiguration(
            extraction_engine.knowledge_base)

        self._SetExtractionParsersAndPlugins(configuration, session)
        self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        processing_status = None
        if single_process_mode:
            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            processing_status = extraction_engine.ProcessSources(
                session.identifier,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                number_of_worker_processes=self._number_of_extraction_workers,
                status_update_callback=status_update_callback)

        self._status_view.PrintExtractionSummary(processing_status)
예제 #15
0
    def ExtractEventsFromSources(self):
        """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported or an invalid collection filter was specified.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path,
                               warn_about_existing=True)

        scan_context = self.ScanSource(self._source_path)
        self._source_type = scan_context.source_type

        is_archive = False
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            is_archive = self._IsArchiveFile(self._source_path_specs[0])
            if is_archive:
                self._source_type = definitions.SOURCE_TYPE_ARCHIVE

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(
            self._source_path,
            self._source_type,
            artifact_filters=self._artifact_filters,
            filter_file=self._filter_file)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        self._output_writer.Write('\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write('Processing started.\n')

        session = engine.BaseEngine.CreateSession(
            artifact_filter_names=self._artifact_filters,
            command_line_arguments=self._command_line_arguments,
            debug_mode=self._debug_mode,
            filter_file_path=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year,
            text_prepend=self._text_prepend)

        storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
            self._storage_format, session, self._storage_file_path)
        if not storage_writer:
            raise errors.BadConfigOption(
                'Unsupported storage format: {0:s}'.format(
                    self._storage_format))

        single_process_mode = self._single_process_mode
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            if not self._process_archives or not is_archive:
                single_process_mode = True

        if single_process_mode:
            extraction_engine = single_process_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_process_engine.TaskMultiProcessEngine(
                number_of_worker_processes=self._number_of_extraction_workers,
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

        # If the source is a storage media image or device, or directory
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine)

        configuration = self._CreateProcessingConfiguration(
            extraction_engine.knowledge_base)

        session.enabled_parser_names = (
            configuration.parser_filter_expression.split(','))
        session.parser_filter_expression = self._parser_filter_expression

        self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

        # TODO: set mount path in knowledge base with
        # extraction_engine.knowledge_base.SetMountPath()
        extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend)

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        processing_status = None
        if single_process_mode:
            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            # The following overrides are needed because pylint 2.6.0 gets confused
            # about which ProcessSources to check against.
            # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                status_update_callback=status_update_callback)

        self._status_view.PrintExtractionSummary(processing_status)
예제 #16
0
  def ExtractEventsFromSources(self):
    """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
    self._CheckStorageFile(self._storage_file_path, warn_about_existing=True)

    scan_context = self.ScanSource(self._source_path)
    self._source_type = scan_context.source_type

    self._status_view.SetMode(self._status_view_mode)
    self._status_view.SetSourceInformation(
        self._source_path, self._source_type, filter_file=self._filter_file)

    status_update_callback = (
        self._status_view.GetExtractionStatusUpdateCallback())

    self._output_writer.Write('\n')
    self._status_view.PrintExtractionStatusHeader(None)
    self._output_writer.Write('Processing started.\n')

    session = engine.BaseEngine.CreateSession(
        command_line_arguments=self._command_line_arguments,
        debug_mode=self._debug_mode,
        filter_file=self._filter_file,
        preferred_encoding=self.preferred_encoding,
        preferred_time_zone=self._preferred_time_zone,
        preferred_year=self._preferred_year)

    if self._storage_format == definitions.STORAGE_FORMAT_SQLITE:
      storage_writer = storage_sqlite_file.SQLiteStorageFileWriter(
          session, self._storage_file_path)

    else:
      storage_writer = storage_zip_file.ZIPStorageFileWriter(
          session, self._storage_file_path)

    single_process_mode = self._single_process_mode
    if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
      # No need to multi process a single file source.
      single_process_mode = True

    if single_process_mode:
      extraction_engine = single_process_engine.SingleProcessEngine()
    else:
      extraction_engine = multi_process_engine.TaskMultiProcessEngine(
          use_zeromq=self._use_zeromq)

    # If the source is a directory or a storage media image
    # run pre-processing.
    if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
      self._PreprocessSources(extraction_engine)

    configuration = self._CreateProcessingConfiguration()

    if not configuration.parser_filter_expression:
      operating_system = extraction_engine.knowledge_base.GetValue(
          'operating_system')
      operating_system_product = extraction_engine.knowledge_base.GetValue(
          'operating_system_product')
      operating_system_version = extraction_engine.knowledge_base.GetValue(
          'operating_system_version')
      parser_filter_expression = (
          parsers_manager.ParsersManager.GetPresetForOperatingSystem(
              operating_system, operating_system_product,
              operating_system_version))

      if parser_filter_expression:
        logging.info('Parser filter expression changed to: {0:s}'.format(
            parser_filter_expression))

      configuration.parser_filter_expression = parser_filter_expression

      names_generator = parsers_manager.ParsersManager.GetParserAndPluginNames(
          parser_filter_expression=parser_filter_expression)

      session.enabled_parser_names = list(names_generator)
      session.parser_filter_expression = parser_filter_expression

    # Note session.preferred_time_zone will default to UTC but
    # self._preferred_time_zone is None when not set.
    if self._preferred_time_zone:
      try:
        extraction_engine.knowledge_base.SetTimeZone(self._preferred_time_zone)
      except ValueError:
        # pylint: disable=protected-access
        logging.warning(
            'Unsupported time zone: {0:s}, defaulting to {1:s}'.format(
                self._preferred_time_zone,
                extraction_engine.knowledge_base._time_zone.zone))

    filter_find_specs = None
    if configuration.filter_file:
      environment_variables = (
          extraction_engine.knowledge_base.GetEnvironmentVariables())
      filter_file_object = filter_file.FilterFile(configuration.filter_file)
      filter_find_specs = filter_file_object.BuildFindSpecs(
          environment_variables=environment_variables)

    processing_status = None
    if single_process_mode:
      logging.debug('Starting extraction in single process mode.')

      processing_status = extraction_engine.ProcessSources(
          self._source_path_specs, storage_writer, self._resolver_context,
          configuration, filter_find_specs=filter_find_specs,
          status_update_callback=status_update_callback)

    else:
      logging.debug('Starting extraction in multi process mode.')

      processing_status = extraction_engine.ProcessSources(
          session.identifier, self._source_path_specs, storage_writer,
          configuration, enable_sigsegv_handler=self._enable_sigsegv_handler,
          filter_find_specs=filter_find_specs,
          number_of_worker_processes=self._number_of_extraction_workers,
          status_update_callback=status_update_callback,
          worker_memory_limit=self._worker_memory_limit)

    self._status_view.PrintExtractionSummary(processing_status)
예제 #17
0
    def ProcessSources(self,
                       source_path_specs,
                       source_type,
                       command_line_arguments=None,
                       enable_sigsegv_handler=False,
                       filter_file=None,
                       hasher_names_string=None,
                       number_of_extraction_workers=0,
                       preferred_encoding=u'utf-8',
                       parser_filter_expression=None,
                       single_process_mode=False,
                       status_update_callback=None,
                       timezone=pytz.UTC):
        """Processes the sources.

    Args:
      source_path_specs: list of path specifications (instances of
                         dfvfs.PathSpec) to process.
      source_type: the dfVFS source type definition.
      command_line_arguments: optional string of the command line arguments or
                              None if not set.
      enable_sigsegv_handler: optional boolean value to indicate the SIGSEGV
                              handler should be enabled.
      filter_file: optional path to a file that contains find specifications.
      hasher_names_string: optional comma separated string of names of
                           hashers to enable.
      number_of_extraction_workers: the number of extraction workers to run. If
                                    0, the number will be selected
                                    automatically.
      preferred_encoding: optional preferred encoding.
      parser_filter_expression: optional string containing the parser filter
                                expression, where None represents all parsers
                                and plugins.
      single_process_mode: optional boolean value to indicate if the front-end
                           should run in single process mode.
      status_update_callback: optional callback function for status updates.
      timezone: optional preferred timezone.

    Returns:
      The processing status (instance of ProcessingStatus) or None.

    Raises:
      SourceScannerError: if the source scanner could not find a supported
                          file system.
      UserAbort: if the user initiated an abort.
    """
        # If the source is a directory or a storage media image
        # run pre-processing.
        if source_type in [
                dfvfs_definitions.SOURCE_TYPE_DIRECTORY,
                dfvfs_definitions.SOURCE_TYPE_STORAGE_MEDIA_DEVICE,
                dfvfs_definitions.SOURCE_TYPE_STORAGE_MEDIA_IMAGE
        ]:
            self.SetEnablePreprocessing(True)
        else:
            self.SetEnablePreprocessing(False)

        self._CheckStorageFile(self._storage_file_path)

        self._single_process_mode = single_process_mode
        if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            self._single_process_mode = True

        if self._single_process_mode:
            self._engine = single_process.SingleProcessEngine(self._queue_size)
        else:
            self._engine = multi_process.MultiProcessEngine(
                maximum_number_of_queued_items=self._queue_size,
                use_zeromq=self._use_zeromq)

        self._engine.SetEnableDebugOutput(self._debug_mode)
        self._engine.SetEnableProfiling(
            self._enable_profiling,
            profiling_sample_rate=self._profiling_sample_rate,
            profiling_type=self._profiling_type)

        pre_obj = self._PreprocessSources(source_path_specs, source_type)

        self._operating_system = getattr(pre_obj, u'guessed_os', None)

        if not parser_filter_expression:
            guessed_os = self._operating_system
            os_version = getattr(pre_obj, u'osversion', u'')
            parser_filter_expression = self._GetParserFilterPreset(
                os_guess=guessed_os, os_version=os_version)

            if parser_filter_expression:
                logging.info(
                    u'Parser filter expression changed to: {0:s}'.format(
                        parser_filter_expression))

        self._parser_names = []
        for _, parser_class in parsers_manager.ParsersManager.GetParsers(
                parser_filter_expression=parser_filter_expression):
            self._parser_names.append(parser_class.NAME)

        self._hasher_names = []
        hasher_manager = hashers_manager.HashersManager
        for hasher_name in hasher_manager.GetHasherNamesFromString(
                hasher_names_string=hasher_names_string):
            self._hasher_names.append(hasher_name)

        self._PreprocessSetTimezone(pre_obj, timezone=timezone)

        if filter_file:
            filter_find_specs = engine_utils.BuildFindSpecsFromFile(
                filter_file, pre_obj=pre_obj)
        else:
            filter_find_specs = None

        # TODO: deprecate the need for this function.
        self._PreprocessSetCollectionInformation(pre_obj)

        session_start = self._CreateSessionStart(
            command_line_arguments=command_line_arguments,
            filter_file=filter_file,
            parser_filter_expression=parser_filter_expression,
            preferred_encoding=preferred_encoding)

        storage_writer = storage_zip_file.ZIPStorageFileWriter(
            self._storage_file_path, pre_obj, buffer_size=self._buffer_size)

        storage_writer.SetEnableProfiling(self._enable_profiling,
                                          profiling_type=self._profiling_type)

        storage_writer.Open()
        storage_writer.WriteSessionStart(session_start)

        processing_status = None
        try:
            if self._single_process_mode:
                logging.debug(u'Starting extraction in single process mode.')

                processing_status = self._engine.ProcessSources(
                    source_path_specs,
                    storage_writer,
                    filter_find_specs=filter_find_specs,
                    filter_object=self._filter_object,
                    hasher_names_string=hasher_names_string,
                    mount_path=self._mount_path,
                    parser_filter_expression=parser_filter_expression,
                    process_archive_files=self._process_archive_files,
                    resolver_context=self._resolver_context,
                    status_update_callback=status_update_callback,
                    text_prepend=self._text_prepend)

            else:
                logging.debug(u'Starting extraction in multi process mode.')

                # TODO: pass number_of_extraction_workers.
                processing_status = self._engine.ProcessSources(
                    source_path_specs,
                    storage_writer,
                    enable_sigsegv_handler=enable_sigsegv_handler,
                    filter_find_specs=filter_find_specs,
                    filter_object=self._filter_object,
                    hasher_names_string=hasher_names_string,
                    mount_path=self._mount_path,
                    number_of_extraction_workers=number_of_extraction_workers,
                    parser_filter_expression=parser_filter_expression,
                    process_archive_files=self._process_archive_files,
                    status_update_callback=status_update_callback,
                    show_memory_usage=self._show_worker_memory_information,
                    text_prepend=self._text_prepend)

        except KeyboardInterrupt:
            self._CleanUpAfterAbort()
            raise errors.UserAbort

        # TODO: check if this still works and if still needed.
        except Exception as exception:  # pylint: disable=broad-except
            if not self._single_process_mode:
                raise

            # The tool should generally not be run in single process mode
            # for other reasons than to debug. Hence the general error
            # catching.
            logging.error(
                u'An uncaught exception occurred: {0:s}.\n{1:s}'.format(
                    exception, traceback.format_exc()))
            if self._debug_mode:
                pdb.post_mortem()

        return processing_status
예제 #18
0
  def ExtractEventsFromSources(self):
    """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
    self._CheckStorageFile(self._storage_file_path, warn_about_existing=True)

    scan_context = self.ScanSource(self._source_path)
    self._source_type = scan_context.source_type

    self._status_view.SetMode(self._status_view_mode)
    self._status_view.SetSourceInformation(
        self._source_path, self._source_type, filter_file=self._filter_file)

    status_update_callback = (
        self._status_view.GetExtractionStatusUpdateCallback())

    self._output_writer.Write('\n')
    self._status_view.PrintExtractionStatusHeader(None)
    self._output_writer.Write('Processing started.\n')

    session = engine.BaseEngine.CreateSession(
        command_line_arguments=self._command_line_arguments,
        debug_mode=self._debug_mode,
        filter_file=self._filter_file,
        preferred_encoding=self.preferred_encoding,
        preferred_time_zone=self._preferred_time_zone,
        preferred_year=self._preferred_year)

    storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
        self._storage_format, session, self._storage_file_path)
    if not storage_writer:
      raise errors.BadConfigOption(
          'Unsupported storage format: {0:s}'.format(self._storage_format))

    single_process_mode = self._single_process_mode
    if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
      # No need to multi process a single file source.
      single_process_mode = True

    if single_process_mode:
      extraction_engine = single_process_engine.SingleProcessEngine()
    else:
      extraction_engine = multi_process_engine.TaskMultiProcessEngine(
          use_zeromq=self._use_zeromq)

    # If the source is a directory or a storage media image
    # run pre-processing.
    if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
      self._PreprocessSources(extraction_engine)

    configuration = self._CreateProcessingConfiguration(
        extraction_engine.knowledge_base)

    self._SetExtractionParsersAndPlugins(configuration, session)
    self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

    filter_find_specs = None
    if configuration.filter_file:
      environment_variables = (
          extraction_engine.knowledge_base.GetEnvironmentVariables())
      filter_file_object = filter_file.FilterFile(configuration.filter_file)
      filter_find_specs = filter_file_object.BuildFindSpecs(
          environment_variables=environment_variables)

    processing_status = None
    if single_process_mode:
      logger.debug('Starting extraction in single process mode.')

      processing_status = extraction_engine.ProcessSources(
          self._source_path_specs, storage_writer, self._resolver_context,
          configuration, filter_find_specs=filter_find_specs,
          status_update_callback=status_update_callback)

    else:
      logger.debug('Starting extraction in multi process mode.')

      processing_status = extraction_engine.ProcessSources(
          session.identifier, self._source_path_specs, storage_writer,
          configuration, enable_sigsegv_handler=self._enable_sigsegv_handler,
          filter_find_specs=filter_find_specs,
          number_of_worker_processes=self._number_of_extraction_workers,
          status_update_callback=status_update_callback,
          worker_memory_limit=self._worker_memory_limit)

    self._status_view.PrintExtractionSummary(processing_status)
예제 #19
0
  def _StartSingleThread(self, options):
    """Starts everything up in a single process.

    This should not normally be used, since running the tool in a single
    process buffers up everything into memory until the storage is called.

    Just to make it clear, this starts up the collection, completes that
    before calling the worker that extracts all EventObjects and stores
    them in memory. when that is all done, the storage function is called
    to drain the buffer. Hence the tool's excessive use of memory in this
    mode and the reason why it is not suggested to be used except for
    debugging reasons (and mostly to get into the debugger).

    This is therefore mostly useful during debugging sessions for some
    limited parsing.

    Args:
      options: the command line arguments (instance of argparse.Namespace).
    """
    self._engine = single_process.SingleProcessEngine(self._queue_size)
    self._engine.SetEnableDebugOutput(self._debug_mode)
    self._engine.SetEnableProfiling(
        self._enable_profiling,
        profiling_sample_rate=self._profiling_sample_rate)
    self._engine.SetProcessArchiveFiles(self._process_archive_files)

    if self._filter_object:
      self._engine.SetFilterObject(self._filter_object)

    if self._mount_path:
      self._engine.SetMountPath(self._mount_path)

    if self._text_prepend:
      self._engine.SetTextPrepend(self._text_prepend)

    # TODO: add support to handle multiple partitions.
    self._engine.SetSource(
        self.GetSourcePathSpec(), resolver_context=self._resolver_context)

    logging.debug(u'Starting preprocessing.')
    pre_obj = self.PreprocessSource(options)

    logging.debug(u'Preprocessing done.')

    # TODO: make sure parsers option is not set by preprocessing.
    parser_filter_string = getattr(options, 'parsers', '')

    self._parser_names = []
    for _, parser_class in parsers_manager.ParsersManager.GetParsers(
        parser_filter_string=parser_filter_string):
      self._parser_names.append(parser_class.NAME)

    self._PreprocessSetCollectionInformation(options, pre_obj)

    if 'filestat' in self._parser_names:
      include_directory_stat = True
    else:
      include_directory_stat = False

    filter_file = getattr(options, 'file_filter', None)
    if filter_file:
      filter_find_specs = engine_utils.BuildFindSpecsFromFile(
          filter_file, pre_obj=pre_obj)
    else:
      filter_find_specs = None

    self._collector = self._engine.CreateCollector(
        include_directory_stat, vss_stores=self._vss_stores,
        filter_find_specs=filter_find_specs,
        resolver_context=self._resolver_context)

    self._DebugPrintCollector(options)

    if self._output_module:
      storage_writer = storage.BypassStorageWriter(
          self._engine.storage_queue, self._storage_file_path,
          output_module_string=self._output_module, pre_obj=pre_obj)
    else:
      storage_writer = storage.StorageFileWriter(
          self._engine.storage_queue, self._storage_file_path,
          buffer_size=self._buffer_size, pre_obj=pre_obj,
          serializer_format=self._storage_serializer_format)

    hasher_names_string = getattr(options, u'hashers', u'')

    try:
      self._engine.ProcessSource(
          self._collector, storage_writer,
          parser_filter_string=parser_filter_string,
          hasher_names_string=hasher_names_string)

    except KeyboardInterrupt:
      self._CleanUpAfterAbort()
      raise errors.UserAbort(u'Process source aborted.')

    finally:
      self._resolver_context.Empty()